[PATCH 1/7] x86-64: introduce fast variant of smp_call_function_single()
From: Jens Axboe
Date: Wed Mar 12 2008 - 07:58:08 EST
From: Nick Piggin <npiggin@xxxxxxx>
Signed-off-by: Jens Axboe <jens.axboe@xxxxxxxxxx>
---
arch/x86/kernel/entry_64.S | 3 +
arch/x86/kernel/i8259_64.c | 1 +
arch/x86/kernel/smp_64.c | 303 +++++++++++++++++++++--------
include/asm-x86/hw_irq_64.h | 4 +-
include/asm-x86/mach-default/entry_arch.h | 1 +
include/linux/smp.h | 2 +-
6 files changed, 232 insertions(+), 82 deletions(-)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c20c9e7..22caf56 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -713,6 +713,9 @@ END(invalidate_interrupt\num)
ENTRY(call_function_interrupt)
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
END(call_function_interrupt)
+ENTRY(call_function_single_interrupt)
+ apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
+END(call_function_single_interrupt)
ENTRY(irq_move_cleanup_interrupt)
apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
END(irq_move_cleanup_interrupt)
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
index fa57a15..2b0b6d2 100644
--- a/arch/x86/kernel/i8259_64.c
+++ b/arch/x86/kernel/i8259_64.c
@@ -493,6 +493,7 @@ void __init native_init_IRQ(void)
/* IPI for generic function call */
set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+ set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
/* Low priority IPI to cleanup after moving an irq */
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index 2fd74b0..1196a12 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -18,6 +18,7 @@
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/interrupt.h>
+#include <linux/rcupdate.h>
#include <asm/mtrr.h>
#include <asm/pgalloc.h>
@@ -295,21 +296,29 @@ void smp_send_reschedule(int cpu)
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
}
+#define CALL_WAIT 0x01
+#define CALL_FALLBACK 0x02
/*
* Structure and data for smp_call_function(). This is designed to minimise
* static memory requirements. It also looks cleaner.
*/
static DEFINE_SPINLOCK(call_lock);
-struct call_data_struct {
+struct call_data {
+ spinlock_t lock;
+ struct list_head list;
void (*func) (void *info);
void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
+ unsigned int flags;
+ unsigned int refs;
+ cpumask_t cpumask;
+ struct rcu_head rcu_head;
};
-static struct call_data_struct * call_data;
+static LIST_HEAD(call_queue);
+
+static unsigned long call_fallback_used;
+static struct call_data call_data_fallback;
void lock_ipi_call_lock(void)
{
@@ -321,55 +330,47 @@ void unlock_ipi_call_lock(void)
spin_unlock_irq(&call_lock);
}
-/*
- * this function sends a 'generic call function' IPI to all other CPU
- * of the system defined in the mask.
- */
-static int __smp_call_function_mask(cpumask_t mask,
- void (*func)(void *), void *info,
- int wait)
-{
- struct call_data_struct data;
- cpumask_t allbutself;
- int cpus;
- allbutself = cpu_online_map;
- cpu_clear(smp_processor_id(), allbutself);
-
- cpus_and(mask, mask, allbutself);
- cpus = cpus_weight(mask);
-
- if (!cpus)
- return 0;
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
+struct call_single_data {
+ struct list_head list;
+ void (*func) (void *info);
+ void *info;
+ unsigned int flags;
+};
- call_data = &data;
- wmb();
+struct call_single_queue {
+ spinlock_t lock;
+ struct list_head list;
+};
+static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
- /* Send a message to other CPUs */
- if (cpus_equal(mask, allbutself))
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
- else
- send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+static unsigned long call_single_fallback_used;
+static struct call_single_data call_single_data_fallback;
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
+int __cpuinit init_smp_call(void)
+{
+ int i;
- if (!wait)
- return 0;
+ for_each_cpu_mask(i, cpu_possible_map) {
+ spin_lock_init(&per_cpu(call_single_queue, i).lock);
+ INIT_LIST_HEAD(&per_cpu(call_single_queue, i).list);
+ }
+ return 0;
+}
+core_initcall(init_smp_call);
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
+static void rcu_free_call_data(struct rcu_head *head)
+{
+ struct call_data *data;
+ data = container_of(head, struct call_data, rcu_head);
+ kfree(data);
+}
- return 0;
+static void free_call_data(struct call_data *data)
+{
+ call_rcu(&data->rcu_head, rcu_free_call_data);
}
+
/**
* smp_call_function_mask(): Run a function on a set of other CPUs.
* @mask: The set of cpus to run on. Must not include the current cpu.
@@ -389,15 +390,69 @@ int smp_call_function_mask(cpumask_t mask,
void (*func)(void *), void *info,
int wait)
{
- int ret;
+ struct call_data *data;
+ cpumask_t allbutself;
+ unsigned int flags;
+ int cpus;
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
+ WARN_ON(preemptible());
+
+ allbutself = cpu_online_map;
+ cpu_clear(smp_processor_id(), allbutself);
+
+ cpus_and(mask, mask, allbutself);
+ cpus = cpus_weight(mask);
+
+ if (!cpus)
+ return 0;
- spin_lock(&call_lock);
- ret = __smp_call_function_mask(mask, func, info, wait);
+ flags = wait ? CALL_WAIT : 0;
+ data = kmalloc(sizeof(struct call_data), GFP_ATOMIC);
+ if (unlikely(!data)) {
+ while (test_and_set_bit_lock(0, &call_fallback_used))
+ cpu_relax();
+ data = &call_data_fallback;
+ flags |= CALL_FALLBACK;
+ /* XXX: can IPI all to "synchronize" RCU? */
+ }
+
+ spin_lock_init(&data->lock);
+ data->func = func;
+ data->info = info;
+ data->flags = flags;
+ data->refs = cpus;
+ data->cpumask = mask;
+
+ local_irq_disable();
+ while (!spin_trylock(&call_lock)) {
+ local_irq_enable();
+ cpu_relax();
+ local_irq_disable();
+ }
+ /* could do ipi = list_empty(&dst->list) || !cpumask_ipi_pending() */
+ list_add_tail_rcu(&data->list, &call_queue);
spin_unlock(&call_lock);
- return ret;
+ local_irq_enable();
+
+ /* Send a message to other CPUs */
+ if (cpus_equal(mask, allbutself))
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+ else
+ send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+
+ if (wait) {
+ /* Wait for response */
+ while (data->flags)
+ cpu_relax();
+ if (likely(!(flags & CALL_FALLBACK)))
+ free_call_data(data);
+ else
+ clear_bit_unlock(0, &call_fallback_used);
+ }
+
+ return 0;
}
EXPORT_SYMBOL(smp_call_function_mask);
@@ -414,11 +469,11 @@ EXPORT_SYMBOL(smp_call_function_mask);
* or is or has executed.
*/
-int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int nonatomic, int wait)
{
/* prevent preemption and reschedule on another processor */
- int ret, me = get_cpu();
+ int me = get_cpu();
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
@@ -427,14 +482,54 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
local_irq_disable();
func(info);
local_irq_enable();
- put_cpu();
- return 0;
- }
+ } else {
+ struct call_single_data d;
+ struct call_single_data *data;
+ struct call_single_queue *dst;
+ cpumask_t mask = cpumask_of_cpu(cpu);
+ unsigned int flags = wait ? CALL_WAIT : 0;
+ int ipi;
+
+ if (!wait) {
+ data = kmalloc(sizeof(struct call_single_data), GFP_ATOMIC);
+ if (unlikely(!data)) {
+ while (test_and_set_bit_lock(0, &call_single_fallback_used))
+ cpu_relax();
+ data = &call_single_data_fallback;
+ flags |= CALL_FALLBACK;
+ }
+ } else {
+ data = &d;
+ }
+
+ data->func = func;
+ data->info = info;
+ data->flags = flags;
+ dst = &per_cpu(call_single_queue, cpu);
+
+ local_irq_disable();
+ while (!spin_trylock(&dst->lock)) {
+ local_irq_enable();
+ cpu_relax();
+ local_irq_disable();
+ }
+ ipi = list_empty(&dst->list);
+ list_add_tail(&data->list, &dst->list);
+ spin_unlock(&dst->lock);
+ local_irq_enable();
- ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
+ if (ipi)
+ send_IPI_mask(mask, CALL_FUNCTION_SINGLE_VECTOR);
+
+ if (wait) {
+ /* Wait for response */
+ while (data->flags)
+ cpu_relax();
+ }
+ }
put_cpu();
- return ret;
+ return 0;
}
EXPORT_SYMBOL(smp_call_function_single);
@@ -474,18 +569,13 @@ static void stop_this_cpu(void *dummy)
void smp_send_stop(void)
{
- int nolock;
unsigned long flags;
if (reboot_force)
return;
- /* Don't deadlock on the call lock in panic */
- nolock = !spin_trylock(&call_lock);
local_irq_save(flags);
- __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
- if (!nolock)
- spin_unlock(&call_lock);
+ smp_call_function(stop_this_cpu, NULL, 0, 0);
disable_local_APIC();
local_irq_restore(flags);
}
@@ -503,28 +593,83 @@ asmlinkage void smp_reschedule_interrupt(void)
asmlinkage void smp_call_function_interrupt(void)
{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
+ struct list_head *pos, *tmp;
+ int cpu = smp_processor_id();
ack_APIC_irq();
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
exit_idle();
irq_enter();
- (*func)(info);
+
+ list_for_each_safe_rcu(pos, tmp, &call_queue) {
+ struct call_data *data;
+ int refs;
+
+ data = list_entry(pos, struct call_data, list);
+ if (!cpu_isset(cpu, data->cpumask))
+ continue;
+
+ data->func(data->info);
+ spin_lock(&data->lock);
+ WARN_ON(!cpu_isset(cpu, data->cpumask));
+ cpu_clear(cpu, data->cpumask);
+ WARN_ON(data->refs == 0);
+ data->refs--;
+ refs = data->refs;
+ spin_unlock(&data->lock);
+
+ if (refs == 0) {
+ WARN_ON(cpus_weight(data->cpumask));
+ spin_lock(&call_lock);
+ list_del_rcu(&data->list);
+ spin_unlock(&call_lock);
+ if (data->flags & CALL_WAIT) {
+ smp_wmb();
+ data->flags = 0;
+ } else {
+ if (likely(!(data->flags & CALL_FALLBACK)))
+ free_call_data(data);
+ else
+ clear_bit_unlock(0, &call_fallback_used);
+ }
+ }
+ }
+
add_pda(irq_call_count, 1);
irq_exit();
- if (wait) {
- mb();
- atomic_inc(&call_data->finished);
+}
+
+asmlinkage void smp_call_function_single_interrupt(void)
+{
+ struct call_single_queue *q;
+ LIST_HEAD(list);
+
+ ack_APIC_irq();
+ exit_idle();
+ irq_enter();
+
+ q = &__get_cpu_var(call_single_queue);
+ spin_lock(&q->lock);
+ list_replace_init(&q->list, &list);
+ spin_unlock(&q->lock);
+
+ while (!list_empty(&list)) {
+ struct call_single_data *data;
+
+ data = list_entry(list.next, struct call_single_data, list);
+ list_del(&data->list);
+
+ data->func(data->info);
+ if (data->flags & CALL_WAIT) {
+ smp_wmb();
+ data->flags = 0;
+ } else {
+ if (likely(!(data->flags & CALL_FALLBACK)))
+ kfree(data);
+ else
+ clear_bit_unlock(0, &call_single_fallback_used);
+ }
}
+ add_pda(irq_call_count, 1);
+ irq_exit();
}
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
index 312a58d..06ac80c 100644
--- a/include/asm-x86/hw_irq_64.h
+++ b/include/asm-x86/hw_irq_64.h
@@ -68,8 +68,7 @@
#define ERROR_APIC_VECTOR 0xfe
#define RESCHEDULE_VECTOR 0xfd
#define CALL_FUNCTION_VECTOR 0xfc
-/* fb free - please don't readd KDB here because it's useless
- (hint - think what a NMI bit does to a vector) */
+#define CALL_FUNCTION_SINGLE_VECTOR 0xfb
#define THERMAL_APIC_VECTOR 0xfa
#define THRESHOLD_APIC_VECTOR 0xf9
/* f8 free */
@@ -102,6 +101,7 @@ void spurious_interrupt(void);
void error_interrupt(void);
void reschedule_interrupt(void);
void call_function_interrupt(void);
+void call_function_single_interrupt(void);
void irq_move_cleanup_interrupt(void);
void invalidate_interrupt0(void);
void invalidate_interrupt1(void);
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
index bc86146..9283b60 100644
--- a/include/asm-x86/mach-default/entry_arch.h
+++ b/include/asm-x86/mach-default/entry_arch.h
@@ -13,6 +13,7 @@
BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
#endif
/*
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 55232cc..c938d26 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,7 +53,6 @@ extern void smp_cpus_done(unsigned int max_cpus);
* Call a function on all other processors
*/
int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
-
int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
int retry, int wait);
@@ -92,6 +91,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
}
#define smp_call_function(func, info, retry, wait) \
(up_smp_call_function(func, info))
+
#define on_each_cpu(func,info,retry,wait) \
({ \
local_irq_disable(); \
--
1.5.4.GIT
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/