[RFC PATCH 3/5] Replace per-subsystem mutexes with get_online_cpus()

From: Gautham R Shenoy
Date: Wed Oct 24 2007 - 01:34:43 EST


This patch converts the known per-subsystem mutexes to get_online_cpus
put_online_cpus. It also eliminates the CPU_LOCK_ACQUIRE
and CPU_LOCK_RELEASE hotplug notification events.

Signed-off-by: Gautham R Shenoy <ego@xxxxxxxxxx>
---
include/linux/notifier.h | 4 +---
kernel/cpu.c | 4 ----
kernel/sched.c | 27 ++++++++++-----------------
kernel/workqueue.c | 31 ++++++++++++++++---------------
mm/slab.c | 18 +++++++++++-------
5 files changed, 38 insertions(+), 46 deletions(-)

Index: linux-2.6.23/kernel/sched.c
===================================================================
--- linux-2.6.23.orig/kernel/sched.c
+++ linux-2.6.23/kernel/sched.c
@@ -360,7 +360,6 @@ struct rq {
};

static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-static DEFINE_MUTEX(sched_hotcpu_mutex);

static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
{
@@ -4337,13 +4336,13 @@ long sched_setaffinity(pid_t pid, cpumas
struct task_struct *p;
int retval;

- mutex_lock(&sched_hotcpu_mutex);
+ get_online_cpus();
read_lock(&tasklist_lock);

p = find_process_by_pid(pid);
if (!p) {
read_unlock(&tasklist_lock);
- mutex_unlock(&sched_hotcpu_mutex);
+ put_online_cpus();
return -ESRCH;
}

@@ -4370,7 +4369,7 @@ long sched_setaffinity(pid_t pid, cpumas

out_unlock:
put_task_struct(p);
- mutex_unlock(&sched_hotcpu_mutex);
+ put_online_cpus();
return retval;
}

@@ -4427,7 +4426,7 @@ long sched_getaffinity(pid_t pid, cpumas
struct task_struct *p;
int retval;

- mutex_lock(&sched_hotcpu_mutex);
+ get_online_cpus();
read_lock(&tasklist_lock);

retval = -ESRCH;
@@ -4443,7 +4442,7 @@ long sched_getaffinity(pid_t pid, cpumas

out_unlock:
read_unlock(&tasklist_lock);
- mutex_unlock(&sched_hotcpu_mutex);
+ put_online_cpus();

return retval;
}
@@ -5342,9 +5341,6 @@ migration_call(struct notifier_block *nf
struct rq *rq;

switch (action) {
- case CPU_LOCK_ACQUIRE:
- mutex_lock(&sched_hotcpu_mutex);
- break;

case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
@@ -5398,7 +5394,7 @@ migration_call(struct notifier_block *nf
BUG_ON(rq->nr_running != 0);

/* No need to migrate the tasks: it was best-effort if
- * they didn't take sched_hotcpu_mutex. Just wake up
+ * they didn't do a get_online_cpus(). Just wake up
* the requestors. */
spin_lock_irq(&rq->lock);
while (!list_empty(&rq->migration_queue)) {
@@ -5412,9 +5408,6 @@ migration_call(struct notifier_block *nf
spin_unlock_irq(&rq->lock);
break;
#endif
- case CPU_LOCK_RELEASE:
- mutex_unlock(&sched_hotcpu_mutex);
- break;
}
return NOTIFY_OK;
}
@@ -6338,10 +6331,10 @@ static int arch_reinit_sched_domains(voi
{
int err;

- mutex_lock(&sched_hotcpu_mutex);
+ get_online_cpus();
detach_destroy_domains(&cpu_online_map);
err = arch_init_sched_domains(&cpu_online_map);
- mutex_unlock(&sched_hotcpu_mutex);
+ put_online_cpus();

return err;
}
@@ -6452,12 +6445,12 @@ void __init sched_init_smp(void)
{
cpumask_t non_isolated_cpus;

- mutex_lock(&sched_hotcpu_mutex);
+ get_online_cpus();
arch_init_sched_domains(&cpu_online_map);
cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
if (cpus_empty(non_isolated_cpus))
cpu_set(smp_processor_id(), non_isolated_cpus);
- mutex_unlock(&sched_hotcpu_mutex);
+ put_online_cpus();
/* XXX: Theoretical race here - CPU may be hotplugged now */
hotcpu_notifier(update_sched_domains, 0);

Index: linux-2.6.23/mm/slab.c
===================================================================
--- linux-2.6.23.orig/mm/slab.c
+++ linux-2.6.23/mm/slab.c
@@ -730,8 +730,7 @@ static inline void init_lock_keys(void)
#endif

/*
- * 1. Guard access to the cache-chain.
- * 2. Protect sanity of cpu_online_map against cpu hotplug events
+ * Guard access to the cache-chain.
*/
static DEFINE_MUTEX(cache_chain_mutex);
static struct list_head cache_chain;
@@ -1331,12 +1330,11 @@ static int __cpuinit cpuup_callback(stru
int err = 0;

switch (action) {
- case CPU_LOCK_ACQUIRE:
- mutex_lock(&cache_chain_mutex);
- break;
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
+ mutex_lock(&cache_chain_mutex);
err = cpuup_prepare(cpu);
+ mutex_unlock(&cache_chain_mutex);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
@@ -1373,9 +1371,8 @@ static int __cpuinit cpuup_callback(stru
#endif
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
+ mutex_lock(&cache_chain_mutex);
cpuup_canceled(cpu);
- break;
- case CPU_LOCK_RELEASE:
mutex_unlock(&cache_chain_mutex);
break;
}
@@ -2170,6 +2167,7 @@ kmem_cache_create (const char *name, siz
* We use cache_chain_mutex to ensure a consistent view of
* cpu_online_map as well. Please see cpuup_callback
*/
+ get_online_cpus();
mutex_lock(&cache_chain_mutex);

list_for_each_entry(pc, &cache_chain, next) {
@@ -2396,6 +2394,7 @@ oops:
panic("kmem_cache_create(): failed to create slab `%s'\n",
name);
mutex_unlock(&cache_chain_mutex);
+ put_online_cpus();
return cachep;
}
EXPORT_SYMBOL(kmem_cache_create);
@@ -2547,9 +2546,11 @@ int kmem_cache_shrink(struct kmem_cache
int ret;
BUG_ON(!cachep || in_interrupt());

+ get_online_cpus();
mutex_lock(&cache_chain_mutex);
ret = __cache_shrink(cachep);
mutex_unlock(&cache_chain_mutex);
+ put_online_cpus();
return ret;
}
EXPORT_SYMBOL(kmem_cache_shrink);
@@ -2575,6 +2576,7 @@ void kmem_cache_destroy(struct kmem_cach
BUG_ON(!cachep || in_interrupt());

/* Find the cache in the chain of caches. */
+ get_online_cpus();
mutex_lock(&cache_chain_mutex);
/*
* the chain is never empty, cache_cache is never destroyed
@@ -2584,6 +2586,7 @@ void kmem_cache_destroy(struct kmem_cach
slab_error(cachep, "Can't free all objects");
list_add(&cachep->next, &cache_chain);
mutex_unlock(&cache_chain_mutex);
+ put_online_cpus();
return;
}

@@ -2592,6 +2595,7 @@ void kmem_cache_destroy(struct kmem_cach

__kmem_cache_destroy(cachep);
mutex_unlock(&cache_chain_mutex);
+ put_online_cpus();
}
EXPORT_SYMBOL(kmem_cache_destroy);

Index: linux-2.6.23/kernel/workqueue.c
===================================================================
--- linux-2.6.23.orig/kernel/workqueue.c
+++ linux-2.6.23/kernel/workqueue.c
@@ -592,8 +592,6 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
* Returns zero on success.
* Returns -ve errno on failure.
*
- * Appears to be racy against CPU hotplug.
- *
* schedule_on_each_cpu() is very slow.
*/
int schedule_on_each_cpu(work_func_t func)
@@ -605,7 +603,7 @@ int schedule_on_each_cpu(work_func_t fun
if (!works)
return -ENOMEM;

- preempt_disable(); /* CPU hotplug */
+ get_online_cpus(); /* CPU hotplug */
for_each_online_cpu(cpu) {
struct work_struct *work = per_cpu_ptr(works, cpu);

@@ -613,8 +611,8 @@ int schedule_on_each_cpu(work_func_t fun
set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
}
- preempt_enable();
flush_workqueue(keventd_wq);
+ put_online_cpus();
free_percpu(works);
return 0;
}
@@ -749,8 +747,10 @@ struct workqueue_struct *__create_workqu
err = create_workqueue_thread(cwq, singlethread_cpu);
start_workqueue_thread(cwq, -1);
} else {
+ get_online_cpus();
mutex_lock(&workqueue_mutex);
list_add(&wq->list, &workqueues);
+ mutex_unlock(&workqueue_mutex);

for_each_possible_cpu(cpu) {
cwq = init_cpu_workqueue(wq, cpu);
@@ -759,7 +759,7 @@ struct workqueue_struct *__create_workqu
err = create_workqueue_thread(cwq, cpu);
start_workqueue_thread(cwq, cpu);
}
- mutex_unlock(&workqueue_mutex);
+ put_online_cpus();
}

if (err) {
@@ -809,9 +809,11 @@ void destroy_workqueue(struct workqueue_
struct cpu_workqueue_struct *cwq;
int cpu;

+ get_online_cpus();
mutex_lock(&workqueue_mutex);
list_del(&wq->list);
mutex_unlock(&workqueue_mutex);
+ put_online_cpus();

for_each_cpu_mask(cpu, *cpu_map) {
cwq = per_cpu_ptr(wq->cpu_wq, cpu);
@@ -830,22 +832,17 @@ static int __devinit workqueue_cpu_callb
unsigned int cpu = (unsigned long)hcpu;
struct cpu_workqueue_struct *cwq;
struct workqueue_struct *wq;
+ int ret = NOTIFY_OK;

action &= ~CPU_TASKS_FROZEN;

switch (action) {
- case CPU_LOCK_ACQUIRE:
- mutex_lock(&workqueue_mutex);
- return NOTIFY_OK;
-
- case CPU_LOCK_RELEASE:
- mutex_unlock(&workqueue_mutex);
- return NOTIFY_OK;

case CPU_UP_PREPARE:
cpu_set(cpu, cpu_populated_map);
}

+ mutex_lock(&workqueue_mutex);
list_for_each_entry(wq, &workqueues, list) {
cwq = per_cpu_ptr(wq->cpu_wq, cpu);

@@ -853,8 +850,10 @@ static int __devinit workqueue_cpu_callb
case CPU_UP_PREPARE:
if (!create_workqueue_thread(cwq, cpu))
break;
- printk(KERN_ERR "workqueue for %i failed\n", cpu);
- return NOTIFY_BAD;
+ printk(KERN_ERR "workqueue [%s] for %i failed\n",
+ wq->name, cpu);
+ ret = NOTIFY_BAD;
+ goto out_unlock;

case CPU_ONLINE:
start_workqueue_thread(cwq, cpu);
@@ -868,7 +867,9 @@ static int __devinit workqueue_cpu_callb
}
}

- return NOTIFY_OK;
+out_unlock:
+ mutex_unlock(&workqueue_mutex);
+ return ret;
}

void __init init_workqueues(void)
Index: linux-2.6.23/kernel/cpu.c
===================================================================
--- linux-2.6.23.orig/kernel/cpu.c
+++ linux-2.6.23/kernel/cpu.c
@@ -218,7 +218,6 @@ static int _cpu_down(unsigned int cpu, i
return -EINVAL;

cpu_hotplug_begin();
- raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
hcpu, -1, &nr_calls);
if (err == NOTIFY_BAD) {
@@ -271,7 +270,6 @@ out_thread:
out_allowed:
set_cpus_allowed(current, old_allowed);
out_release:
- raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
cpu_hotplug_done();
return err;
}
@@ -302,7 +300,6 @@ static int __cpuinit _cpu_up(unsigned in
return -EINVAL;

cpu_hotplug_begin();
- raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
-1, &nr_calls);
if (ret == NOTIFY_BAD) {
@@ -326,7 +323,6 @@ out_notify:
if (ret != 0)
__raw_notifier_call_chain(&cpu_chain,
CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
- raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
cpu_hotplug_done();

return ret;
Index: linux-2.6.23/include/linux/notifier.h
===================================================================
--- linux-2.6.23.orig/include/linux/notifier.h
+++ linux-2.6.23/include/linux/notifier.h
@@ -207,9 +207,7 @@ static inline int notifier_to_errno(int
#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */
#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
-#define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */
-#define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */
-#define CPU_DYING 0x000A /* CPU (unsigned)v not running any task,
+#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task,
* not handling interrupts, soon dead */

/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
--
Gautham R Shenoy
Linux Technology Center
IBM India.
"Freedom comes with a price tag of responsibility, which is still a bargain,
because Freedom is priceless!"
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/