[PATCH 1/4] generic-smp: remove single ipi fallback for smp_call_function_many()

From: Peter Zijlstra
Date: Mon Feb 16 2009 - 11:42:55 EST


In preparation of removing the kmalloc() calls from the generic-ipi code
get rid of the single ipi fallback for smp_call_function_many().

Because we cannot get around carrying the cpumask in the data -- imagine
2 such calls with different but overlapping masks -- put in a full mask.

Also, since we cannot simply remove an item from the global queue (another
cpu might be observing it), a quiesence of sorts needs to be observed. The
current code uses regular RCU for that purpose.

However, since we'll be wanting to quickly reuse an item, we need something
with a much faster turn-around. We do this by simply observing the global
queue quiesence. Since there are a limited number of elements, it will auto
force a quiecent state if we wait for it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---
kernel/smp.c | 112 +++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 75 insertions(+), 37 deletions(-)

Index: linux-2.6/kernel/smp.c
===================================================================
--- linux-2.6.orig/kernel/smp.c
+++ linux-2.6/kernel/smp.c
@@ -12,8 +12,23 @@
#include <linux/smp.h>

static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
-static LIST_HEAD(call_function_queue);
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
+
+static struct {
+ struct list_head queue;
+ spinlock_t lock;
+ /*
+ * mini-RCU state machine
+ *
+ * @counter counts the number of entries on the queue, and @free_list
+ * contains the items to free when the queue is idle.
+ */
+ unsigned int counter;
+ struct list_head free_list;
+} call_function __cacheline_aligned_in_smp = {
+ .queue = LIST_HEAD_INIT(call_function.queue),
+ .lock = __SPIN_LOCK_UNLOCKED(call_function.lock),
+ .free_list = LIST_HEAD_INIT(call_function.free_list),
+};

enum {
CSD_FLAG_WAIT = 0x01,
@@ -25,8 +40,11 @@ struct call_function_data {
struct call_single_data csd;
spinlock_t lock;
unsigned int refs;
- struct rcu_head rcu_head;
- unsigned long cpumask_bits[];
+ union {
+ struct rcu_head rcu_head;
+ struct list_head free_list;
+ };
+ struct cpumask cpumask;
};

struct call_single_queue {
@@ -107,17 +125,17 @@ void generic_smp_call_function_interrupt
* It's ok to use list_for_each_rcu() here even though we may delete
* 'pos', since list_del_rcu() doesn't clear ->next
*/
- rcu_read_lock();
- list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
+ list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
+ LIST_HEAD(free_list);
int refs;

- if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits)))
+ if (!cpumask_test_cpu(cpu, &data->cpumask))
continue;

data->csd.func(data->csd.info);

spin_lock(&data->lock);
- cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
+ cpumask_clear_cpu(cpu, &data->cpumask);
WARN_ON(data->refs == 0);
data->refs--;
refs = data->refs;
@@ -126,9 +144,12 @@ void generic_smp_call_function_interrupt
if (refs)
continue;

- spin_lock(&call_function_lock);
+ spin_lock(&call_function.lock);
+ list_add(&data->free_list, &call_function.free_list);
list_del_rcu(&data->csd.list);
- spin_unlock(&call_function_lock);
+ if (!--call_function.counter)
+ list_splice_init(&call_function.free_list, &free_list);
+ spin_unlock(&call_function.lock);

if (data->csd.flags & CSD_FLAG_WAIT) {
/*
@@ -140,8 +161,22 @@ void generic_smp_call_function_interrupt
}
if (data->csd.flags & CSD_FLAG_ALLOC)
call_rcu(&data->rcu_head, rcu_free_call_data);
+
+ while (!list_empty(&free_list)) {
+ struct call_function_data *free;
+
+ free = list_first_entry(&free_list,
+ struct call_function_data, free_list);
+
+ list_del(&data->free_list);
+ /*
+ * serialize stores to data with the flags
+ * clear
+ */
+ smp_wmb();
+ free->csd.flags &= ~CSD_FLAG_LOCK;
+ }
}
- rcu_read_unlock();

put_cpu();
}
@@ -302,6 +337,8 @@ void __smp_call_function_single(int cpu,
arch_send_call_function_ipi(*(maskp))
#endif

+static DEFINE_PER_CPU(struct call_function_data, cfd_data);
+
/**
* smp_call_function_many(): Run a function on a set of other CPUs.
* @mask: The set of cpus to run on (only runs on online subset).
@@ -323,14 +360,14 @@ void smp_call_function_many(const struct
{
struct call_function_data *data;
unsigned long flags;
- int cpu, next_cpu;
+ int cpu, next_cpu, me = smp_processor_id();

/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());

/* So, what's a CPU they want? Ignoring this one. */
cpu = cpumask_first_and(mask, cpu_online_mask);
- if (cpu == smp_processor_id())
+ if (cpu == me)
cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
/* No online cpus? We're done. */
if (cpu >= nr_cpu_ids)
@@ -338,7 +375,7 @@ void smp_call_function_many(const struct

/* Do we have another CPU which isn't us? */
next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
- if (next_cpu == smp_processor_id())
+ if (next_cpu == me)
next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);

/* Fastpath: do that cpu by itself. */
@@ -347,31 +384,32 @@ void smp_call_function_many(const struct
return;
}

- data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC);
- if (unlikely(!data)) {
- /* Slow path. */
- for_each_online_cpu(cpu) {
- if (cpu == smp_processor_id())
- continue;
- if (cpumask_test_cpu(cpu, mask))
- smp_call_function_single(cpu, func, info, wait);
- }
- return;
+ data = kmalloc(sizeof(*data), GFP_ATOMIC);
+ if (data)
+ data->csd.flags = CSD_FLAG_ALLOC;
+ else {
+ data = &per_cpu(cfd_data, me);
+ /*
+ * We need to wait for all previous users to go away.
+ */
+ while (data->csd.flags & CSD_FLAG_LOCK)
+ cpu_relax();
+ data->csd.flags = CSD_FLAG_LOCK;
}

spin_lock_init(&data->lock);
- data->csd.flags = CSD_FLAG_ALLOC;
if (wait)
data->csd.flags |= CSD_FLAG_WAIT;
data->csd.func = func;
data->csd.info = info;
- cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits));
- data->refs = cpumask_weight(to_cpumask(data->cpumask_bits));
-
- spin_lock_irqsave(&call_function_lock, flags);
- list_add_tail_rcu(&data->csd.list, &call_function_queue);
- spin_unlock_irqrestore(&call_function_lock, flags);
+ cpumask_and(&data->cpumask, mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &data->cpumask);
+ data->refs = cpumask_weight(&data->cpumask);
+
+ spin_lock_irqsave(&call_function.lock, flags);
+ call_function.counter++;
+ list_add_tail_rcu(&data->csd.list, &call_function.queue);
+ spin_unlock_irqrestore(&call_function.lock, flags);

/*
* Make the list addition visible before sending the ipi.
@@ -379,7 +417,7 @@ void smp_call_function_many(const struct
smp_mb();

/* Send a message to all CPUs in the map */
- arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits));
+ arch_send_call_function_ipi_mask(&data->cpumask);

/* optionally wait for the CPUs to complete */
if (wait)
@@ -413,20 +451,20 @@ EXPORT_SYMBOL(smp_call_function);

void ipi_call_lock(void)
{
- spin_lock(&call_function_lock);
+ spin_lock(&call_function.lock);
}

void ipi_call_unlock(void)
{
- spin_unlock(&call_function_lock);
+ spin_unlock(&call_function.lock);
}

void ipi_call_lock_irq(void)
{
- spin_lock_irq(&call_function_lock);
+ spin_lock_irq(&call_function.lock);
}

void ipi_call_unlock_irq(void)
{
- spin_unlock_irq(&call_function_lock);
+ spin_unlock_irq(&call_function.lock);
}

--

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/