[PATCH] x86: Align TLB invalidation info

From: Nadav Amit
Date: Wed Jan 31 2018 - 15:11:39 EST


The TLB invalidation info is allocated on the stack, which might cause
it to be unaligned. Since this information may be transferred to
different cores for TLB shootdown, this might result in an additional
cache-line bouncing between the cores.

GCC provides a way to deal with it by using
__builtin_alloca_with_align(). Use it to avoid the bouncing cache lines.

Signed-off-by: Nadav Amit <namit@xxxxxxxxxx>

Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
---
arch/x86/mm/tlb.c | 21 +++++++++++----------
include/linux/compiler-gcc.h | 5 +++++
include/linux/compiler_types.h | 4 ++++
3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5bfe61a5e8e3..bab7bb5d982f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -574,37 +574,38 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long vmflag)
{
+ struct flush_tlb_info *info;
int cpu;

- struct flush_tlb_info info = {
- .mm = mm,
- };
+ info = __alloca_with_align(sizeof(*info),
+ SMP_CACHE_BYTES * BITS_PER_BYTE);
+ info->mm = mm;

cpu = get_cpu();

/* This is also a barrier that synchronizes with switch_mm(). */
- info.new_tlb_gen = inc_mm_tlb_gen(mm);
+ info->new_tlb_gen = inc_mm_tlb_gen(mm);

/* Should we flush just the requested range? */
if ((end != TLB_FLUSH_ALL) &&
!(vmflag & VM_HUGETLB) &&
((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
- info.start = start;
- info.end = end;
+ info->start = start;
+ info->end = end;
} else {
- info.start = 0UL;
- info.end = TLB_FLUSH_ALL;
+ info->start = 0UL;
+ info->end = TLB_FLUSH_ALL;
}

if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
VM_WARN_ON(irqs_disabled());
local_irq_disable();
- flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
+ flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
local_irq_enable();
}

if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), &info);
+ flush_tlb_others(mm_cpumask(mm), info);

put_cpu();
}
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 631354acfa72..aea9a2e69417 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -314,6 +314,11 @@
#define __designated_init __attribute__((designated_init))
#endif

+#if GCC_VERSION >= 60100
+#define __alloca_with_align(size, alignment) \
+ __builtin_alloca_with_align(size, alignment)
+#endif
+
#endif /* gcc version >= 40000 specific checks */

#if !defined(__noclone)
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 6b79a9bba9a7..c71297d95c74 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -271,4 +271,8 @@ struct ftrace_likely_data {
# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
#endif

+#ifndef __alloca_with_align
+#define __alloca_with_align(size, alignment) __builtin_alloca(size)
+#endif
+
#endif /* __LINUX_COMPILER_TYPES_H */
--
2.14.1