[PATCH 3/3] x86: collect hot percpu variables into one cacheline

From: Tejun Heo
Date: Mon Aug 03 2009 - 01:12:49 EST


On x86_64, percpu variables current_task and kernel_stack are used for
get_current() and current_thread_info() respectively and thus are
often used close to each other. Move definition of current_task to
kernel/cpu/common.c right above kernel_stack definition and align it
to cacheline so that they always fall into the same cacheline. Two
percpu variables defined there together - irq_stack_ptr and irq_count
- are also pretty hot and will benefit from sharing the cacheline.

For consistency, current_task definition for x86_32 is also moved to
kernel/cpu/common.c.

Putting current_task and kernel_stack into the same cacheline was
suggested by Linus Tolvards.

Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
Cc: Linus Tolvards <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxx>
---
For now this should do but adding DEFINE_PER_CPU_HOT() definitely is a
viable option.

arch/x86/kernel/cpu/common.c | 15 +++++++++++++--
arch/x86/kernel/process_32.c | 3 ---
arch/x86/kernel/process_64.c | 3 ---
3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 12493c5..1bd88ed 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -987,13 +987,21 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);

-DEFINE_PER_CPU(char *, irq_stack_ptr) =
- init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+/*
+ * The following four percpu variables are hot. Align current_task to
+ * cacheline size such that all four fall in the same cacheline.
+ */
+DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
+ &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);

DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);

+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+ init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+
DEFINE_PER_CPU(unsigned int, irq_count) = -1;

/*
@@ -1041,6 +1049,9 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);

#else /* CONFIG_X86_64 */

+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, stack_canary);
#endif
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524..daa4107 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -61,9 +61,6 @@

asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");

-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
/*
* Return saved PC of a blocked thread.
*/
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ebefb54..c4c675d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -55,9 +55,6 @@

asmlinkage extern void ret_from_fork(void);

-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
DEFINE_PER_CPU(unsigned long, old_rsp);
static DEFINE_PER_CPU(unsigned char, is_idle);

--
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/