[ANNOUNCE] 3.2.23-rt37

From: Steven Rostedt
Date: Thu Jul 19 2012 - 09:41:02 EST

Next message: Ezequiel Garcia: "[PATCH for v3.5] cx25821: Remove bad strcpy to read-only char*"
Previous message: Borislav Petkov: "Re: [PATCH] x86, mm: Send tlb flush IPIs to online cpus only"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Dear RT Folks,

I'm pleased to announce the 3.2.23-rt37 stable release.

You can get this release via the git tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git

Head SHA1: 32a32ba43387e6ad9dd74f38b79ad411c6c1c4e6

Or to build 3.2.23-rt37 directly, the following patches should be applied:

http://www.kernel.org/pub/linux/kernel/v3.x/linux-3.2.tar.xz

http://www.kernel.org/pub/linux/kernel/v3.x/patch-3.2.23.xz

http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/patch-3.2.23-rt37.patch.xz

You can also build from 3.2.23-rt36 by applying the incremental patch:

http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/incr/patch-3.2.23-rt36-rt37.patch.xz

Enjoy,

-- Steve

Changes from 3.2.23-rt36:

---

Steven Rostedt (5):
cpu/rt: Rework cpu down for PREEMPT_RT
cpu/rt: Fix cpu_hotplug variable initialization
workqueue: Revert workqueue: Fix PF_THREAD_BOUND abuse
workqueue: Revert workqueue: Fix cpuhotplug trainwreck
Linux 3.2.23-rt37

----
include/linux/cpu.h | 6 +-
include/linux/sched.h | 7 +
include/linux/workqueue.h | 5 +-
kernel/cpu.c | 240 +++++++++++++++----
kernel/sched.c | 82 ++++++-
kernel/workqueue.c | 578 ++++++++++++++++++++++++++++++++-------------
localversion-rt | 2 +-
7 files changed, 704 insertions(+), 216 deletions(-)
---------------------------
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 72e90bb..c46ec3e 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -66,10 +66,8 @@ enum {
/* migration should happen before other stuff but after perf */
CPU_PRI_PERF = 20,
CPU_PRI_MIGRATION = 10,
-
- CPU_PRI_WORKQUEUE_ACTIVE = 5, /* prepare workqueues for others */
- CPU_PRI_NORMAL = 0,
- CPU_PRI_WORKQUEUE_INACTIVE = -5, /* flush workqueues after others */
+ /* prepare workqueues for other notifiers */
+ CPU_PRI_WORKQUEUE = 5,
};

#define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0174e3a..9ca3172 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1933,6 +1933,10 @@ extern void do_set_cpus_allowed(struct task_struct *p,

extern int set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask);
+int migrate_me(void);
+void tell_sched_cpu_down_begin(int cpu);
+void tell_sched_cpu_down_done(int cpu);
+
#else
static inline void do_set_cpus_allowed(struct task_struct *p,
const struct cpumask *new_mask)
@@ -1945,6 +1949,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
return -EINVAL;
return 0;
}
+static inline int migrate_me(void) { return 0; }
+static inline void tell_sched_cpu_down_begin(int cpu) { }
+static inline void tell_sched_cpu_down_done(int cpu) { }
#endif

#ifndef CONFIG_CPUMASK_OFFSTACK
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 3d8ac9d..e228ca9 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -254,10 +254,9 @@ enum {
WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */
WQ_HIGHPRI = 1 << 4, /* high priority */
WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */
- WQ_NON_AFFINE = 1 << 6, /* free to move works around cpus */

- WQ_DRAINING = 1 << 7, /* internal: workqueue is draining */
- WQ_RESCUER = 1 << 8, /* internal: workqueue has rescuer */
+ WQ_DRAINING = 1 << 6, /* internal: workqueue is draining */
+ WQ_RESCUER = 1 << 7, /* internal: workqueue has rescuer */

WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 66dfb74..8f87b72 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -46,12 +46,7 @@ static int cpu_hotplug_disabled;

static struct {
struct task_struct *active_writer;
-#ifdef CONFIG_PREEMPT_RT_FULL
- /* Makes the lock keep the task's state */
- spinlock_t lock;
-#else
struct mutex lock; /* Synchronizes accesses to refcount, */
-#endif
/*
* Also blocks the new readers during
* an ongoing cpu hotplug operation.
@@ -59,28 +54,46 @@ static struct {
int refcount;
} cpu_hotplug = {
.active_writer = NULL,
-#ifdef CONFIG_PREEMPT_RT_FULL
- .lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock),
-#else
.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-#endif
.refcount = 0,
};

-#ifdef CONFIG_PREEMPT_RT_FULL
-# define hotplug_lock() rt_spin_lock(&cpu_hotplug.lock)
-# define hotplug_unlock() rt_spin_unlock(&cpu_hotplug.lock)
-#else
-# define hotplug_lock() mutex_lock(&cpu_hotplug.lock)
-# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock)
-#endif
-
+/**
+ * hotplug_pcp - per cpu hotplug descriptor
+ * @unplug: set when pin_current_cpu() needs to sync tasks
+ * @sync_tsk: the task that waits for tasks to finish pinned sections
+ * @refcount: counter of tasks in pinned sections
+ * @grab_lock: set when the tasks entering pinned sections should wait
+ * @synced: notifier for @sync_tsk to tell cpu_down it's finished
+ * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
+ * @mutex_init: zero if the mutex hasn't been initialized yet.
+ *
+ * Although @unplug and @sync_tsk may point to the same task, the @unplug
+ * is used as a flag and still exists after @sync_tsk has exited and
+ * @sync_tsk set to NULL.
+ */
struct hotplug_pcp {
struct task_struct *unplug;
+ struct task_struct *sync_tsk;
int refcount;
+ int grab_lock;
struct completion synced;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ spinlock_t lock;
+#else
+ struct mutex mutex;
+#endif
+ int mutex_init;
};

+#ifdef CONFIG_PREEMPT_RT_FULL
+# define hotplug_lock(hp) rt_spin_lock(&(hp)->lock)
+# define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock)
+#else
+# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
+# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
+#endif
+
static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);

/**
@@ -94,18 +107,40 @@ static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
void pin_current_cpu(void)
{
struct hotplug_pcp *hp;
+ int force = 0;

retry:
hp = &__get_cpu_var(hotplug_pcp);

- if (!hp->unplug || hp->refcount || preempt_count() > 1 ||
+ if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
hp->unplug == current || (current->flags & PF_STOMPER)) {
hp->refcount++;
return;
}
- preempt_enable();
- hotplug_lock();
- hotplug_unlock();
+
+ if (hp->grab_lock) {
+ preempt_enable();
+ hotplug_lock(hp);
+ hotplug_unlock(hp);
+ } else {
+ preempt_enable();
+ /*
+ * Try to push this task off of this CPU.
+ */
+ if (!migrate_me()) {
+ preempt_disable();
+ hp = &__get_cpu_var(hotplug_pcp);
+ if (!hp->grab_lock) {
+ /*
+ * Just let it continue it's already pinned
+ * or about to sleep.
+ */
+ force = 1;
+ goto retry;
+ }
+ preempt_enable();
+ }
+ }
preempt_disable();
goto retry;
}
@@ -127,26 +162,84 @@ void unpin_current_cpu(void)
wake_up_process(hp->unplug);
}

-/*
- * FIXME: Is this really correct under all circumstances ?
- */
+static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
+{
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ while (hp->refcount) {
+ schedule_preempt_disabled();
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+}
+
static int sync_unplug_thread(void *data)
{
struct hotplug_pcp *hp = data;

preempt_disable();
hp->unplug = current;
+ wait_for_pinned_cpus(hp);
+
+ /*
+ * This thread will synchronize the cpu_down() with threads
+ * that have pinned the CPU. When the pinned CPU count reaches
+ * zero, we inform the cpu_down code to continue to the next step.
+ */
set_current_state(TASK_UNINTERRUPTIBLE);
- while (hp->refcount) {
- schedule_preempt_disabled();
+ preempt_enable();
+ complete(&hp->synced);
+
+ /*
+ * If all succeeds, the next step will need tasks to wait till
+ * the CPU is offline before continuing. To do this, the grab_lock
+ * is set and tasks going into pin_current_cpu() will block on the
+ * mutex. But we still need to wait for those that are already in
+ * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
+ * will kick this thread out.
+ */
+ while (!hp->grab_lock && !kthread_should_stop()) {
+ schedule();
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+
+ /* Make sure grab_lock is seen before we see a stale completion */
+ smp_mb();
+
+ /*
+ * Now just before cpu_down() enters stop machine, we need to make
+ * sure all tasks that are in pinned CPU sections are out, and new
+ * tasks will now grab the lock, keeping them from entering pinned
+ * CPU sections.
+ */
+ if (!kthread_should_stop()) {
+ preempt_disable();
+ wait_for_pinned_cpus(hp);
+ preempt_enable();
+ complete(&hp->synced);
+ }
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ while (!kthread_should_stop()) {
+ schedule();
set_current_state(TASK_UNINTERRUPTIBLE);
}
set_current_state(TASK_RUNNING);
- preempt_enable();
- complete(&hp->synced);
+
+ /*
+ * Force this thread off this CPU as it's going down and
+ * we don't want any more work on this CPU.
+ */
+ current->flags &= ~PF_THREAD_BOUND;
+ do_set_cpus_allowed(current, cpu_present_mask);
+ migrate_me();
return 0;
}

+static void __cpu_unplug_sync(struct hotplug_pcp *hp)
+{
+ wake_up_process(hp->sync_tsk);
+ wait_for_completion(&hp->synced);
+}
+
/*
* Start the sync_unplug_thread on the target cpu and wait for it to
* complete.
@@ -154,23 +247,83 @@ static int sync_unplug_thread(void *data)
static int cpu_unplug_begin(unsigned int cpu)
{
struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
- struct task_struct *tsk;
+ int err;
+
+ /* Protected by cpu_hotplug.lock */
+ if (!hp->mutex_init) {
+#ifdef CONFIG_PREEMPT_RT_FULL
+ spin_lock_init(&hp->lock);
+#else
+ mutex_init(&hp->mutex);
+#endif
+ hp->mutex_init = 1;
+ }
+
+ /* Inform the scheduler to migrate tasks off this CPU */
+ tell_sched_cpu_down_begin(cpu);

init_completion(&hp->synced);
- tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
- if (IS_ERR(tsk))
- return (PTR_ERR(tsk));
- kthread_bind(tsk, cpu);
- wake_up_process(tsk);
- wait_for_completion(&hp->synced);
+
+ hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
+ if (IS_ERR(hp->sync_tsk)) {
+ err = PTR_ERR(hp->sync_tsk);
+ hp->sync_tsk = NULL;
+ return err;
+ }
+ kthread_bind(hp->sync_tsk, cpu);
+
+ /*
+ * Wait for tasks to get out of the pinned sections,
+ * it's still OK if new tasks enter. Some CPU notifiers will
+ * wait for tasks that are going to enter these sections and
+ * we must not have them block.
+ */
+ __cpu_unplug_sync(hp);
+
return 0;
}

+static void cpu_unplug_sync(unsigned int cpu)
+{
+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
+
+ init_completion(&hp->synced);
+ /* The completion needs to be initialzied before setting grab_lock */
+ smp_wmb();
+
+ /* Grab the mutex before setting grab_lock */
+ hotplug_lock(hp);
+ hp->grab_lock = 1;
+
+ /*
+ * The CPU notifiers have been completed.
+ * Wait for tasks to get out of pinned CPU sections and have new
+ * tasks block until the CPU is completely down.
+ */
+ __cpu_unplug_sync(hp);
+
+ /* All done with the sync thread */
+ kthread_stop(hp->sync_tsk);
+ hp->sync_tsk = NULL;
+}
+
static void cpu_unplug_done(unsigned int cpu)
{
struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);

hp->unplug = NULL;
+ /* Let all tasks know cpu unplug is finished before cleaning up */
+ smp_wmb();
+
+ if (hp->sync_tsk)
+ kthread_stop(hp->sync_tsk);
+
+ if (hp->grab_lock) {
+ hotplug_unlock(hp);
+ /* protected by cpu_hotplug.lock */
+ hp->grab_lock = 0;
+ }
+ tell_sched_cpu_down_done(cpu);
}

void get_online_cpus(void)
@@ -178,9 +331,9 @@ void get_online_cpus(void)
might_sleep();
if (cpu_hotplug.active_writer == current)
return;
- hotplug_lock();
+ mutex_lock(&cpu_hotplug.lock);
cpu_hotplug.refcount++;
- hotplug_unlock();
+ mutex_unlock(&cpu_hotplug.lock);

}
EXPORT_SYMBOL_GPL(get_online_cpus);
@@ -189,10 +342,10 @@ void put_online_cpus(void)
{
if (cpu_hotplug.active_writer == current)
return;
- hotplug_lock();
+ mutex_lock(&cpu_hotplug.lock);
if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
wake_up_process(cpu_hotplug.active_writer);
- hotplug_unlock();
+ mutex_unlock(&cpu_hotplug.lock);

}
EXPORT_SYMBOL_GPL(put_online_cpus);
@@ -224,11 +377,11 @@ static void cpu_hotplug_begin(void)
cpu_hotplug.active_writer = current;

for (;;) {
- hotplug_lock();
+ mutex_lock(&cpu_hotplug.lock);
if (likely(!cpu_hotplug.refcount))
break;
__set_current_state(TASK_UNINTERRUPTIBLE);
- hotplug_unlock();
+ mutex_unlock(&cpu_hotplug.lock);
schedule();
}
}
@@ -236,7 +389,7 @@ static void cpu_hotplug_begin(void)
static void cpu_hotplug_done(void)
{
cpu_hotplug.active_writer = NULL;
- hotplug_unlock();
+ mutex_unlock(&cpu_hotplug.lock);
}

#else /* #if CONFIG_HOTPLUG_CPU */
@@ -371,6 +524,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
goto out_release;
}

+ /* Notifiers are done. Don't let any more tasks pin this CPU. */
+ cpu_unplug_sync(cpu);
+
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
/* CPU didn't die: tell everyone. Can't complain. */
diff --git a/kernel/sched.c b/kernel/sched.c
index 4dd1fff..bc2375e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4403,7 +4403,7 @@ void migrate_disable(void)
{
struct task_struct *p = current;

- if (in_atomic() || p->flags & PF_THREAD_BOUND) {
+ if (in_atomic()) {
#ifdef CONFIG_SCHED_DEBUG
p->migrate_disable_atomic++;
#endif
@@ -4434,7 +4434,7 @@ void migrate_enable(void)
unsigned long flags;
struct rq *rq;

- if (in_atomic() || p->flags & PF_THREAD_BOUND) {
+ if (in_atomic()) {
#ifdef CONFIG_SCHED_DEBUG
p->migrate_disable_atomic--;
#endif
@@ -6360,6 +6360,84 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
cpumask_copy(&p->cpus_allowed, new_mask);
}

+static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
+static DEFINE_MUTEX(sched_down_mutex);
+static cpumask_t sched_down_cpumask;
+
+void tell_sched_cpu_down_begin(int cpu)
+{
+ mutex_lock(&sched_down_mutex);
+ cpumask_set_cpu(cpu, &sched_down_cpumask);
+ mutex_unlock(&sched_down_mutex);
+}
+
+void tell_sched_cpu_down_done(int cpu)
+{
+ mutex_lock(&sched_down_mutex);
+ cpumask_clear_cpu(cpu, &sched_down_cpumask);
+ mutex_unlock(&sched_down_mutex);
+}
+
+/**
+ * migrate_me - try to move the current task off this cpu
+ *
+ * Used by the pin_current_cpu() code to try to get tasks
+ * to move off the current CPU as it is going down.
+ * It will only move the task if the task isn't pinned to
+ * the CPU (with migrate_disable, affinity or THREAD_BOUND)
+ * and the task has to be in a RUNNING state. Otherwise the
+ * movement of the task will wake it up (change its state
+ * to running) when the task did not expect it.
+ *
+ * Returns 1 if it succeeded in moving the current task
+ * 0 otherwise.
+ */
+int migrate_me(void)
+{
+ struct task_struct *p = current;
+ struct migration_arg arg;
+ struct cpumask *cpumask;
+ struct cpumask *mask;
+ unsigned long flags;
+ unsigned int dest_cpu;
+ struct rq *rq;
+
+ /*
+ * We can not migrate tasks bounded to a CPU or tasks not
+ * running. The movement of the task will wake it up.
+ */
+ if (p->flags & PF_THREAD_BOUND || p->state)
+ return 0;
+
+ mutex_lock(&sched_down_mutex);
+ rq = task_rq_lock(p, &flags);
+
+ cpumask = &__get_cpu_var(sched_cpumasks);
+ mask = &p->cpus_allowed;
+
+ cpumask_andnot(cpumask, mask, &sched_down_cpumask);
+
+ if (!cpumask_weight(cpumask)) {
+ /* It's only on this CPU? */
+ task_rq_unlock(rq, p, &flags);
+ mutex_unlock(&sched_down_mutex);
+ return 0;
+ }
+
+ dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
+
+ arg.task = p;
+ arg.dest_cpu = dest_cpu;
+
+ task_rq_unlock(rq, p, &flags);
+
+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
+ tlb_migrate_finish(p->mm);
+ mutex_unlock(&sched_down_mutex);
+
+ return 1;
+}
+
/*
* This is how migration works:
*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 674d783..5d23c05b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,7 +41,6 @@
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
#include <linux/idr.h>
-#include <linux/delay.h>

#include "workqueue_sched.h"

@@ -58,10 +57,20 @@ enum {
WORKER_DIE = 1 << 1, /* die die die */
WORKER_IDLE = 1 << 2, /* is idle */
WORKER_PREP = 1 << 3, /* preparing to run works */
- WORKER_CPU_INTENSIVE = 1 << 4, /* cpu intensive */
- WORKER_UNBOUND = 1 << 5, /* worker is unbound */
+ WORKER_ROGUE = 1 << 4, /* not bound to any cpu */
+ WORKER_REBIND = 1 << 5, /* mom is home, come back */
+ WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */
+ WORKER_UNBOUND = 1 << 7, /* worker is unbound */

- WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE | WORKER_UNBOUND,
+ WORKER_NOT_RUNNING = WORKER_PREP | WORKER_ROGUE | WORKER_REBIND |
+ WORKER_CPU_INTENSIVE | WORKER_UNBOUND,
+
+ /* gcwq->trustee_state */
+ TRUSTEE_START = 0, /* start */
+ TRUSTEE_IN_CHARGE = 1, /* trustee in charge of gcwq */
+ TRUSTEE_BUTCHER = 2, /* butcher workers */
+ TRUSTEE_RELEASE = 3, /* release workers */
+ TRUSTEE_DONE = 4, /* trustee is done */

BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER,
@@ -75,6 +84,7 @@ enum {
(min two ticks) */
MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */
CREATE_COOLDOWN = HZ, /* time to breath after fail */
+ TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */

/*
* Rescue workers are used only on emergencies and shared by
@@ -126,6 +136,7 @@ struct worker {
unsigned long last_active; /* L: last active timestamp */
unsigned int flags; /* X: flags */
int id; /* I: worker id */
+ struct work_struct rebind_work; /* L: rebind worker to cpu */
int sleeping; /* None */
};

@@ -153,8 +164,10 @@ struct global_cwq {

struct ida worker_ida; /* L: for worker IDs */

+ struct task_struct *trustee; /* L: for gcwq shutdown */
+ unsigned int trustee_state; /* L: trustee state */
+ wait_queue_head_t trustee_wait; /* trustee wait */
struct worker *first_idle; /* L: first idle worker */
- wait_queue_head_t idle_wait;
} ____cacheline_aligned_in_smp;

/*
@@ -961,38 +974,13 @@ static bool is_chained_work(struct workqueue_struct *wq)
return false;
}

-static void ___queue_work(struct workqueue_struct *wq, struct global_cwq *gcwq,
- struct work_struct *work)
-{
- struct cpu_workqueue_struct *cwq;
- struct list_head *worklist;
- unsigned int work_flags;
-
- /* gcwq determined, get cwq and queue */
- cwq = get_cwq(gcwq->cpu, wq);
- trace_workqueue_queue_work(gcwq->cpu, cwq, work);
-
- BUG_ON(!list_empty(&work->entry));
-
- cwq->nr_in_flight[cwq->work_color]++;
- work_flags = work_color_to_flags(cwq->work_color);
-
- if (likely(cwq->nr_active < cwq->max_active)) {
- trace_workqueue_activate_work(work);
- cwq->nr_active++;
- worklist = gcwq_determine_ins_pos(gcwq, cwq);
- } else {
- work_flags |= WORK_STRUCT_DELAYED;
- worklist = &cwq->delayed_works;
- }
-
- insert_work(cwq, work, worklist, work_flags);
-}
-
static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
struct work_struct *work)
{
struct global_cwq *gcwq;
+ struct cpu_workqueue_struct *cwq;
+ struct list_head *worklist;
+ unsigned int work_flags;
unsigned long flags;

debug_work_activate(work);
@@ -1038,32 +1026,27 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
spin_lock_irqsave(&gcwq->lock, flags);
}

- ___queue_work(wq, gcwq, work);
+ /* gcwq determined, get cwq and queue */
+ cwq = get_cwq(gcwq->cpu, wq);
+ trace_workqueue_queue_work(cpu, cwq, work);

- spin_unlock_irqrestore(&gcwq->lock, flags);
-}
+ BUG_ON(!list_empty(&work->entry));

-/**
- * queue_work_on - queue work on specific cpu
- * @cpu: CPU number to execute work on
- * @wq: workqueue to use
- * @work: work to queue
- *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
- *
- * We queue the work to a specific CPU, the caller must ensure it
- * can't go away.
- */
-static int
-__queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
-{
- int ret = 0;
+ cwq->nr_in_flight[cwq->work_color]++;
+ work_flags = work_color_to_flags(cwq->work_color);

- if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
- __queue_work(cpu, wq, work);
- ret = 1;
+ if (likely(cwq->nr_active < cwq->max_active)) {
+ trace_workqueue_activate_work(work);
+ cwq->nr_active++;
+ worklist = gcwq_determine_ins_pos(gcwq, cwq);
+ } else {
+ work_flags |= WORK_STRUCT_DELAYED;
+ worklist = &cwq->delayed_works;
}
- return ret;
+
+ insert_work(cwq, work, worklist, work_flags);
+
+ spin_unlock_irqrestore(&gcwq->lock, flags);
}

/**
@@ -1080,19 +1063,34 @@ int queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
int ret;

- ret = __queue_work_on(get_cpu_light(), wq, work);
+ ret = queue_work_on(get_cpu_light(), wq, work);
put_cpu_light();

return ret;
}
EXPORT_SYMBOL_GPL(queue_work);

+/**
+ * queue_work_on - queue work on specific cpu
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
+ *
+ * We queue the work to a specific CPU, the caller must ensure it
+ * can't go away.
+ */
int
queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
- WARN_ON(wq->flags & WQ_NON_AFFINE);
+ int ret = 0;

- return __queue_work_on(cpu, wq, work);
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+ __queue_work(cpu, wq, work);
+ ret = 1;
+ }
+ return ret;
}
EXPORT_SYMBOL_GPL(queue_work_on);

@@ -1138,8 +1136,6 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work;

- WARN_ON((wq->flags & WQ_NON_AFFINE) && cpu != -1);
-
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
unsigned int lcpu;

@@ -1205,13 +1201,12 @@ static void worker_enter_idle(struct worker *worker)
/* idle_list is LIFO */
list_add(&worker->entry, &gcwq->idle_list);

- if (gcwq->nr_idle == gcwq->nr_workers)
- wake_up_all(&gcwq->idle_wait);
-
- if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer)) {
- mod_timer(&gcwq->idle_timer,
- jiffies + IDLE_WORKER_TIMEOUT);
- }
+ if (likely(!(worker->flags & WORKER_ROGUE))) {
+ if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer))
+ mod_timer(&gcwq->idle_timer,
+ jiffies + IDLE_WORKER_TIMEOUT);
+ } else
+ wake_up_all(&gcwq->trustee_wait);

/* sanity check nr_running */
WARN_ON_ONCE(gcwq->nr_workers == gcwq->nr_idle &&
@@ -1288,14 +1283,8 @@ __acquires(&gcwq->lock)
return false;
if (task_cpu(task) == gcwq->cpu &&
cpumask_equal(&current->cpus_allowed,
- get_cpu_mask(gcwq->cpu))) {
- /*
- * Since we're binding to a particular cpu and need to
- * stay there for correctness, mark us PF_THREAD_BOUND.
- */
- task->flags |= PF_THREAD_BOUND;
+ get_cpu_mask(gcwq->cpu)))
return true;
- }
spin_unlock_irq(&gcwq->lock);

/*
@@ -1309,15 +1298,20 @@ __acquires(&gcwq->lock)
}
}

-static void worker_unbind_and_unlock(struct worker *worker)
+/*
+ * Function for worker->rebind_work used to rebind rogue busy workers
+ * to the associated cpu which is coming back online. This is
+ * scheduled by cpu up but can race with other cpu hotplug operations
+ * and may be executed twice without intervening cpu down.
+ */
+static void worker_rebind_fn(struct work_struct *work)
{
+ struct worker *worker = container_of(work, struct worker, rebind_work);
struct global_cwq *gcwq = worker->gcwq;
- struct task_struct *task = worker->task;

- /*
- * Its no longer required we're PF_THREAD_BOUND, the work is done.
- */
- task->flags &= ~PF_THREAD_BOUND;
+ if (worker_maybe_bind_and_lock(worker))
+ worker_clr_flags(worker, WORKER_REBIND);
+
spin_unlock_irq(&gcwq->lock);
}

@@ -1329,6 +1323,7 @@ static struct worker *alloc_worker(void)
if (worker) {
INIT_LIST_HEAD(&worker->entry);
INIT_LIST_HEAD(&worker->scheduled);
+ INIT_WORK(&worker->rebind_work, worker_rebind_fn);
/* on creation a worker is in !idle && prep state */
worker->flags = WORKER_PREP;
}
@@ -1383,9 +1378,15 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
if (IS_ERR(worker->task))
goto fail;

+ /*
+ * A rogue worker will become a regular one if CPU comes
+ * online later on. Make sure every worker has
+ * PF_THREAD_BOUND set.
+ */
if (bind && !on_unbound_cpu)
kthread_bind(worker->task, gcwq->cpu);
else {
+ worker->task->flags |= PF_THREAD_BOUND;
if (on_unbound_cpu)
worker->flags |= WORKER_UNBOUND;
}
@@ -1662,6 +1663,13 @@ static bool manage_workers(struct worker *worker)

gcwq->flags &= ~GCWQ_MANAGING_WORKERS;

+ /*
+ * The trustee might be waiting to take over the manager
+ * position, tell it we're done.
+ */
+ if (unlikely(gcwq->trustee))
+ wake_up_all(&gcwq->trustee_wait);
+
return ret;
}

@@ -2062,7 +2070,7 @@ repeat:
if (keep_working(gcwq))
wake_up_worker(gcwq);

- worker_unbind_and_unlock(rescuer);
+ spin_unlock_irq(&gcwq->lock);
}

schedule();
@@ -3011,6 +3019,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
if (IS_ERR(rescuer->task))
goto err;

+ rescuer->task->flags |= PF_THREAD_BOUND;
wake_up_process(rescuer->task);
}

@@ -3200,76 +3209,171 @@ EXPORT_SYMBOL_GPL(work_busy);
* gcwqs serve mix of short, long and very long running works making
* blocked draining impractical.
*
+ * This is solved by allowing a gcwq to be detached from CPU, running
+ * it with unbound (rogue) workers and allowing it to be reattached
+ * later if the cpu comes back online. A separate thread is created
+ * to govern a gcwq in such state and is called the trustee of the
+ * gcwq.
+ *
+ * Trustee states and their descriptions.
+ *
+ * START Command state used on startup. On CPU_DOWN_PREPARE, a
+ * new trustee is started with this state.
+ *
+ * IN_CHARGE Once started, trustee will enter this state after
+ * assuming the manager role and making all existing
+ * workers rogue. DOWN_PREPARE waits for trustee to
+ * enter this state. After reaching IN_CHARGE, trustee
+ * tries to execute the pending worklist until it's empty
+ * and the state is set to BUTCHER, or the state is set
+ * to RELEASE.
+ *
+ * BUTCHER Command state which is set by the cpu callback after
+ * the cpu has went down. Once this state is set trustee
+ * knows that there will be no new works on the worklist
+ * and once the worklist is empty it can proceed to
+ * killing idle workers.
+ *
+ * RELEASE Command state which is set by the cpu callback if the
+ * cpu down has been canceled or it has come online
+ * again. After recognizing this state, trustee stops
+ * trying to drain or butcher and clears ROGUE, rebinds
+ * all remaining workers back to the cpu and releases
+ * manager role.
+ *
+ * DONE Trustee will enter this state after BUTCHER or RELEASE
+ * is complete.
+ *
+ * trustee CPU draining
+ * took over down complete
+ * START -----------> IN_CHARGE -----------> BUTCHER -----------> DONE
+ * | | ^
+ * | CPU is back online v return workers |
+ * ----------------> RELEASE --------------
*/

-static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
-{
- unsigned int cpu = (unsigned long)hcpu;
- struct global_cwq *gcwq = get_gcwq(cpu);
- struct worker *uninitialized_var(new_worker);
- unsigned long flags;
+/**
+ * trustee_wait_event_timeout - timed event wait for trustee
+ * @cond: condition to wait for
+ * @timeout: timeout in jiffies
+ *
+ * wait_event_timeout() for trustee to use. Handles locking and
+ * checks for RELEASE request.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times. To be used by trustee.
+ *
+ * RETURNS:
+ * Positive indicating left time if @cond is satisfied, 0 if timed
+ * out, -1 if canceled.
+ */
+#define trustee_wait_event_timeout(cond, timeout) ({ \
+ long __ret = (timeout); \
+ while (!((cond) || (gcwq->trustee_state == TRUSTEE_RELEASE)) && \
+ __ret) { \
+ spin_unlock_irq(&gcwq->lock); \
+ __wait_event_timeout(gcwq->trustee_wait, (cond) || \
+ (gcwq->trustee_state == TRUSTEE_RELEASE), \
+ __ret); \
+ spin_lock_irq(&gcwq->lock); \
+ } \
+ gcwq->trustee_state == TRUSTEE_RELEASE ? -1 : (__ret); \
+})

- action &= ~CPU_TASKS_FROZEN;
+/**
+ * trustee_wait_event - event wait for trustee
+ * @cond: condition to wait for
+ *
+ * wait_event() for trustee to use. Automatically handles locking and
+ * checks for CANCEL request.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times. To be used by trustee.
+ *
+ * RETURNS:
+ * 0 if @cond is satisfied, -1 if canceled.
+ */
+#define trustee_wait_event(cond) ({ \
+ long __ret1; \
+ __ret1 = trustee_wait_event_timeout(cond, MAX_SCHEDULE_TIMEOUT);\
+ __ret1 < 0 ? -1 : 0; \
+})

- switch (action) {
- case CPU_UP_PREPARE:
- BUG_ON(gcwq->first_idle);
- new_worker = create_worker(gcwq, false);
- if (!new_worker)
- return NOTIFY_BAD;
- case CPU_UP_CANCELED:
- case CPU_ONLINE:
- break;
- default:
- return notifier_from_errno(0);
- }
+static int __cpuinit trustee_thread(void *__gcwq)
+{
+ struct global_cwq *gcwq = __gcwq;
+ struct worker *worker;
+ struct work_struct *work;
+ struct hlist_node *pos;
+ long rc;
+ int i;

- /* some are called w/ irq disabled, don't disturb irq status */
- spin_lock_irqsave(&gcwq->lock, flags);
+ BUG_ON(gcwq->cpu != smp_processor_id());

- switch (action) {
- case CPU_UP_PREPARE:
- BUG_ON(gcwq->first_idle);
- gcwq->first_idle = new_worker;
- break;
+ spin_lock_irq(&gcwq->lock);
+ /*
+ * Claim the manager position and make all workers rogue.
+ * Trustee must be bound to the target cpu and can't be
+ * cancelled.
+ */
+ BUG_ON(gcwq->cpu != smp_processor_id());
+ rc = trustee_wait_event(!(gcwq->flags & GCWQ_MANAGING_WORKERS));
+ BUG_ON(rc < 0);

- case CPU_UP_CANCELED:
- destroy_worker(gcwq->first_idle);
- gcwq->first_idle = NULL;
- break;
+ gcwq->flags |= GCWQ_MANAGING_WORKERS;

- case CPU_ONLINE:
- spin_unlock_irq(&gcwq->lock);
- kthread_bind(gcwq->first_idle->task, cpu);
- spin_lock_irq(&gcwq->lock);
- gcwq->flags |= GCWQ_MANAGE_WORKERS;
- start_worker(gcwq->first_idle);
- gcwq->first_idle = NULL;
- break;
- }
+ list_for_each_entry(worker, &gcwq->idle_list, entry)
+ worker->flags |= WORKER_ROGUE;

- spin_unlock_irqrestore(&gcwq->lock, flags);
+ for_each_busy_worker(worker, i, pos, gcwq)
+ worker->flags |= WORKER_ROGUE;

- return notifier_from_errno(0);
-}
+ /*
+ * Call schedule() so that we cross rq->lock and thus can
+ * guarantee sched callbacks see the rogue flag. This is
+ * necessary as scheduler callbacks may be invoked from other
+ * cpus.
+ */
+ spin_unlock_irq(&gcwq->lock);
+ schedule();
+ spin_lock_irq(&gcwq->lock);

-static void flush_gcwq(struct global_cwq *gcwq)
-{
- struct work_struct *work, *nw;
- struct worker *worker, *n;
- LIST_HEAD(non_affine_works);
+ /*
+ * Sched callbacks are disabled now. Zap nr_running. After
+ * this, nr_running stays zero and need_more_worker() and
+ * keep_working() are always true as long as the worklist is
+ * not empty.
+ */
+ atomic_set(get_gcwq_nr_running(gcwq->cpu), 0);

+ spin_unlock_irq(&gcwq->lock);
+ del_timer_sync(&gcwq->idle_timer);
spin_lock_irq(&gcwq->lock);
- list_for_each_entry_safe(work, nw, &gcwq->worklist, entry) {
- struct workqueue_struct *wq = get_work_cwq(work)->wq;

- if (wq->flags & WQ_NON_AFFINE)
- list_move(&work->entry, &non_affine_works);
- }
+ /*
+ * We're now in charge. Notify and proceed to drain. We need
+ * to keep the gcwq running during the whole CPU down
+ * procedure as other cpu hotunplug callbacks may need to
+ * flush currently running tasks.
+ */
+ gcwq->trustee_state = TRUSTEE_IN_CHARGE;
+ wake_up_all(&gcwq->trustee_wait);

- while (!list_empty(&gcwq->worklist)) {
+ /*
+ * The original cpu is in the process of dying and may go away
+ * anytime now. When that happens, we and all workers would
+ * be migrated to other cpus. Try draining any left work. We
+ * want to get it over with ASAP - spam rescuers, wake up as
+ * many idlers as necessary and create new ones till the
+ * worklist is empty. Note that if the gcwq is frozen, there
+ * may be frozen works in freezable cwqs. Don't declare
+ * completion while frozen.
+ */
+ while (gcwq->nr_workers != gcwq->nr_idle ||
+ gcwq->flags & GCWQ_FREEZING ||
+ gcwq->trustee_state == TRUSTEE_IN_CHARGE) {
int nr_works = 0;

list_for_each_entry(work, &gcwq->worklist, entry) {
@@ -3283,55 +3387,200 @@ static void flush_gcwq(struct global_cwq *gcwq)
wake_up_process(worker->task);
}

- spin_unlock_irq(&gcwq->lock);
-
if (need_to_create_worker(gcwq)) {
- worker = create_worker(gcwq, true);
- if (worker)
+ spin_unlock_irq(&gcwq->lock);
+ worker = create_worker(gcwq, false);
+ spin_lock_irq(&gcwq->lock);
+ if (worker) {
+ worker->flags |= WORKER_ROGUE;
start_worker(worker);
+ }
}

- wait_event_timeout(gcwq->idle_wait,
- gcwq->nr_idle == gcwq->nr_workers, HZ/10);
-
- spin_lock_irq(&gcwq->lock);
+ /* give a breather */
+ if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0)
+ break;
}

- WARN_ON(gcwq->nr_workers != gcwq->nr_idle);
+ /*
+ * Either all works have been scheduled and cpu is down, or
+ * cpu down has already been canceled. Wait for and butcher
+ * all workers till we're canceled.
+ */
+ do {
+ rc = trustee_wait_event(!list_empty(&gcwq->idle_list));
+ while (!list_empty(&gcwq->idle_list))
+ destroy_worker(list_first_entry(&gcwq->idle_list,
+ struct worker, entry));
+ } while (gcwq->nr_workers && rc >= 0);

- list_for_each_entry_safe(worker, n, &gcwq->idle_list, entry)
- destroy_worker(worker);
+ /*
+ * At this point, either draining has completed and no worker
+ * is left, or cpu down has been canceled or the cpu is being
+ * brought back up. There shouldn't be any idle one left.
+ * Tell the remaining busy ones to rebind once it finishes the
+ * currently scheduled works by scheduling the rebind_work.
+ */
+ WARN_ON(!list_empty(&gcwq->idle_list));

- WARN_ON(gcwq->nr_workers || gcwq->nr_idle);
+ for_each_busy_worker(worker, i, pos, gcwq) {
+ struct work_struct *rebind_work = &worker->rebind_work;

- spin_unlock_irq(&gcwq->lock);
+ /*
+ * Rebind_work may race with future cpu hotplug
+ * operations. Use a separate flag to mark that
+ * rebinding is scheduled.
+ */
+ worker->flags |= WORKER_REBIND;
+ worker->flags &= ~WORKER_ROGUE;

- gcwq = get_gcwq(get_cpu_light());
- spin_lock_irq(&gcwq->lock);
- list_for_each_entry_safe(work, nw, &non_affine_works, entry) {
- list_del_init(&work->entry);
- ___queue_work(get_work_cwq(work)->wq, gcwq, work);
+ /* queue rebind_work, wq doesn't matter, use the default one */
+ if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
+ work_data_bits(rebind_work)))
+ continue;
+
+ debug_work_activate(rebind_work);
+ insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
+ worker->scheduled.next,
+ work_color_to_flags(WORK_NO_COLOR));
}
+
+ /* relinquish manager role */
+ gcwq->flags &= ~GCWQ_MANAGING_WORKERS;
+
+ /* notify completion */
+ gcwq->trustee = NULL;
+ gcwq->trustee_state = TRUSTEE_DONE;
+ wake_up_all(&gcwq->trustee_wait);
spin_unlock_irq(&gcwq->lock);
- put_cpu_light();
+ return 0;
}

-static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
+/**
+ * wait_trustee_state - wait for trustee to enter the specified state
+ * @gcwq: gcwq the trustee of interest belongs to
+ * @state: target state to wait for
+ *
+ * Wait for the trustee to reach @state. DONE is already matched.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times. To be used by cpu_callback.
+ */
+static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state)
+__releases(&gcwq->lock)
+__acquires(&gcwq->lock)
+{
+ if (!(gcwq->trustee_state == state ||
+ gcwq->trustee_state == TRUSTEE_DONE)) {
+ spin_unlock_irq(&gcwq->lock);
+ __wait_event(gcwq->trustee_wait,
+ gcwq->trustee_state == state ||
+ gcwq->trustee_state == TRUSTEE_DONE);
+ spin_lock_irq(&gcwq->lock);
+ }
+}
+
+static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct global_cwq *gcwq = get_gcwq(cpu);
+ struct task_struct *new_trustee = NULL;
+ struct worker *uninitialized_var(new_worker);
+ unsigned long flags;

action &= ~CPU_TASKS_FROZEN;

- switch (action) {
- case CPU_DOWN_PREPARE:
- flush_gcwq(gcwq);
- break;
- }
+ switch (action) {
+ case CPU_DOWN_PREPARE:
+ new_trustee = kthread_create(trustee_thread, gcwq,
+ "workqueue_trustee/%d\n", cpu);
+ if (IS_ERR(new_trustee))
+ return notifier_from_errno(PTR_ERR(new_trustee));
+ kthread_bind(new_trustee, cpu);
+ /* fall through */
+ case CPU_UP_PREPARE:
+ BUG_ON(gcwq->first_idle);
+ new_worker = create_worker(gcwq, false);
+ if (!new_worker) {
+ if (new_trustee)
+ kthread_stop(new_trustee);
+ return NOTIFY_BAD;
+ }
+ break;
+ case CPU_POST_DEAD:
+ case CPU_UP_CANCELED:
+ case CPU_DOWN_FAILED:
+ case CPU_ONLINE:
+ break;
+ case CPU_DYING:
+ /*
+ * We access this lockless. We are on the dying CPU
+ * and called from stomp machine.
+ *
+ * Before this, the trustee and all workers except for
+ * the ones which are still executing works from
+ * before the last CPU down must be on the cpu. After
+ * this, they'll all be diasporas.
+ */
+ gcwq->flags |= GCWQ_DISASSOCIATED;
+ default:
+ goto out;
+ }
+
+ /* some are called w/ irq disabled, don't disturb irq status */
+ spin_lock_irqsave(&gcwq->lock, flags);
+
+ switch (action) {
+ case CPU_DOWN_PREPARE:
+ /* initialize trustee and tell it to acquire the gcwq */
+ BUG_ON(gcwq->trustee || gcwq->trustee_state != TRUSTEE_DONE);
+ gcwq->trustee = new_trustee;
+ gcwq->trustee_state = TRUSTEE_START;
+ wake_up_process(gcwq->trustee);
+ wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);
+ /* fall through */
+ case CPU_UP_PREPARE:
+ BUG_ON(gcwq->first_idle);
+ gcwq->first_idle = new_worker;
+ break;
+
+ case CPU_POST_DEAD:
+ gcwq->trustee_state = TRUSTEE_BUTCHER;
+ /* fall through */
+ case CPU_UP_CANCELED:
+ destroy_worker(gcwq->first_idle);
+ gcwq->first_idle = NULL;
+ break;

+ case CPU_DOWN_FAILED:
+ case CPU_ONLINE:
+ gcwq->flags &= ~GCWQ_DISASSOCIATED;
+ if (gcwq->trustee_state != TRUSTEE_DONE) {
+ gcwq->trustee_state = TRUSTEE_RELEASE;
+ wake_up_process(gcwq->trustee);
+ wait_trustee_state(gcwq, TRUSTEE_DONE);
+ }
+
+ /*
+ * Trustee is done and there might be no worker left.
+ * Put the first_idle in and request a real manager to
+ * take a look.
+ */
+ spin_unlock_irq(&gcwq->lock);
+ kthread_bind(gcwq->first_idle->task, cpu);
+ spin_lock_irq(&gcwq->lock);
+ gcwq->flags |= GCWQ_MANAGE_WORKERS;
+ start_worker(gcwq->first_idle);
+ gcwq->first_idle = NULL;
+ break;
+ }
+
+ spin_unlock_irqrestore(&gcwq->lock, flags);

+out:
return notifier_from_errno(0);
}

@@ -3528,8 +3777,7 @@ static int __init init_workqueues(void)
unsigned int cpu;
int i;

- cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_ACTIVE);
- hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_INACTIVE);
+ cpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);

/* initialize gcwqs */
for_each_gcwq_cpu(cpu) {
@@ -3552,7 +3800,9 @@ static int __init init_workqueues(void)
(unsigned long)gcwq);

ida_init(&gcwq->worker_ida);
- init_waitqueue_head(&gcwq->idle_wait);
+
+ gcwq->trustee_state = TRUSTEE_DONE;
+ init_waitqueue_head(&gcwq->trustee_wait);
}

/* create the initial worker */
diff --git a/localversion-rt b/localversion-rt
index 2294034..a3b2408 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt36
+-rt37

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Ezequiel Garcia: "[PATCH for v3.5] cx25821: Remove bad strcpy to read-only char*"
Previous message: Borislav Petkov: "Re: [PATCH] x86, mm: Send tlb flush IPIs to online cpus only"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]