[RFC PATCH] x86/hweight: Get rid of the special calling convention
From: Borislav Petkov
Date: Wed May 04 2016 - 14:46:22 EST
On Thu, Apr 07, 2016 at 11:43:33AM +0200, Borislav Petkov wrote:
> I guess we can do something like this:
>
> if (likely(static_cpu_has(X86_FEATURE_POPCNT)))
> asm volatile(POPCNT32
> : "="REG_OUT (res)
> : REG_IN (w));
> else
> res = __sw_hweight32(w);
>
> and get rid of the custom calling convention.
>
> Along with some numbers showing that the change doesn't cause any
> noticeable slowdown...
Ok, here's something which seems to build and boot in kvm.
I like how we don't need the special calling conventions anymore and we
can actually say "popcnt .." and gcc selects registers.
The include files hackery is kinda nasty but I had to do it because I
needed to be able to use static_cpu_has() in a header and including
asm/cpufeature.h pulls in all kinds of nasty dependencies. I'm certainly
open for better ideas...
---
From: Borislav Petkov <bp@xxxxxxx>
Date: Wed, 4 May 2016 18:52:09 +0200
Subject: [PATCH] x86/hweight: Get rid of the special calling convention
People complained about ARCH_HWEIGHT_CFLAGS and how it throws a wrench
into kcov, lto, etc, experimentation.
And its not like we absolutely need it so let's get rid of it and
streamline it a bit. I had to do some carving out of facilities so
that the include hell doesn't swallow me but other than that, the new
__arch_hweight*() versions look much more palatable and gcc is more free
to select registers than us hardcoding them in the insn bytes.
Signed-off-by: Borislav Petkov <bp@xxxxxxx>
---
arch/x86/Kconfig | 5 --
arch/x86/include/asm/arch_hweight.h | 43 ++++---------
arch/x86/include/asm/cpufeature.h | 112 +-------------------------------
arch/x86/include/asm/cpuinfo.h | 65 +++++++++++++++++++
arch/x86/include/asm/processor.h | 63 +-----------------
arch/x86/include/asm/static_cpu_has.h | 116 ++++++++++++++++++++++++++++++++++
lib/Makefile | 5 --
7 files changed, 197 insertions(+), 212 deletions(-)
create mode 100644 arch/x86/include/asm/cpuinfo.h
create mode 100644 arch/x86/include/asm/static_cpu_has.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7bb15747fea2..79e0bcd61cb1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -292,11 +292,6 @@ config X86_32_LAZY_GS
def_bool y
depends on X86_32 && !CC_STACKPROTECTOR
-config ARCH_HWEIGHT_CFLAGS
- string
- default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
- default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
-
config ARCH_SUPPORTS_UPROBES
def_bool y
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 02e799fa43d1..6c1a2d500c4c 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -2,36 +2,18 @@
#define _ASM_X86_HWEIGHT_H
#include <asm/cpufeatures.h>
+#include <asm/static_cpu_has.h>
-#ifdef CONFIG_64BIT
-/* popcnt %edi, %eax -- redundant REX prefix for alignment */
-#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
-/* popcnt %rdi, %rax */
-#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
-#define REG_IN "D"
-#define REG_OUT "a"
-#else
-/* popcnt %eax, %eax */
-#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0"
-#define REG_IN "a"
-#define REG_OUT "a"
-#endif
-
-/*
- * __sw_hweightXX are called from within the alternatives below
- * and callee-clobbered registers need to be taken care of. See
- * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
- * compiler switches.
- */
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
- unsigned int res = 0;
+ unsigned int res;
- asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ if (likely(static_cpu_has(X86_FEATURE_POPCNT))) {
+ asm volatile("popcnt %[w], %[res]" : [res] "=r" (res) : [w] "r" (w));
- return res;
+ return res;
+ }
+ return __sw_hweight32(w);
}
static inline unsigned int __arch_hweight16(unsigned int w)
@@ -53,13 +35,14 @@ static inline unsigned long __arch_hweight64(__u64 w)
#else
static __always_inline unsigned long __arch_hweight64(__u64 w)
{
- unsigned long res = 0;
+ unsigned long res;
- asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ if (likely(static_cpu_has(X86_FEATURE_POPCNT))) {
+ asm volatile("popcnt %[w], %[res]" : [res] "=r" (res) : [w] "r" (w));
- return res;
+ return res;
+ }
+ return __sw_hweight64(w);
}
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 07c942d84662..9a70b12ae8df 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -6,6 +6,8 @@
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
#include <asm/asm.h>
+#include <asm/static_cpu_has.h>
+
#include <linux/bitops.h>
enum cpuid_leafs
@@ -45,51 +47,6 @@ extern const char * const x86_power_flags[32];
*/
extern const char * const x86_bug_flags[NBUGINTS*32];
-#define test_cpu_cap(c, bit) \
- test_bit(bit, (unsigned long *)((c)->x86_capability))
-
-#define REQUIRED_MASK_BIT_SET(bit) \
- ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0 )) || \
- (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1 )) || \
- (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2 )) || \
- (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3 )) || \
- (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4 )) || \
- (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5 )) || \
- (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6 )) || \
- (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7 )) || \
- (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8 )) || \
- (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9 )) || \
- (((bit)>>5)==10 && (1UL<<((bit)&31) & REQUIRED_MASK10)) || \
- (((bit)>>5)==11 && (1UL<<((bit)&31) & REQUIRED_MASK11)) || \
- (((bit)>>5)==12 && (1UL<<((bit)&31) & REQUIRED_MASK12)) || \
- (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK13)) || \
- (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK14)) || \
- (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK15)) || \
- (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
-
-#define DISABLED_MASK_BIT_SET(bit) \
- ( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0 )) || \
- (((bit)>>5)==1 && (1UL<<((bit)&31) & DISABLED_MASK1 )) || \
- (((bit)>>5)==2 && (1UL<<((bit)&31) & DISABLED_MASK2 )) || \
- (((bit)>>5)==3 && (1UL<<((bit)&31) & DISABLED_MASK3 )) || \
- (((bit)>>5)==4 && (1UL<<((bit)&31) & DISABLED_MASK4 )) || \
- (((bit)>>5)==5 && (1UL<<((bit)&31) & DISABLED_MASK5 )) || \
- (((bit)>>5)==6 && (1UL<<((bit)&31) & DISABLED_MASK6 )) || \
- (((bit)>>5)==7 && (1UL<<((bit)&31) & DISABLED_MASK7 )) || \
- (((bit)>>5)==8 && (1UL<<((bit)&31) & DISABLED_MASK8 )) || \
- (((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9 )) || \
- (((bit)>>5)==10 && (1UL<<((bit)&31) & DISABLED_MASK10)) || \
- (((bit)>>5)==11 && (1UL<<((bit)&31) & DISABLED_MASK11)) || \
- (((bit)>>5)==12 && (1UL<<((bit)&31) & DISABLED_MASK12)) || \
- (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK13)) || \
- (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK14)) || \
- (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK15)) || \
- (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
-
-#define cpu_has(c, bit) \
- (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
- test_cpu_cap(c, bit))
-
#define this_cpu_has(bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
@@ -105,8 +62,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define cpu_feature_enabled(bit) \
(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
-#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
-
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
#define setup_clear_cpu_cap(bit) do { \
@@ -118,69 +73,6 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
-/*
- * Static testing of CPU features. Used the same as boot_cpu_has().
- * These will statically patch the target code for additional
- * performance.
- */
-static __always_inline __pure bool _static_cpu_has(u16 bit)
-{
- asm_volatile_goto("1: jmp 6f\n"
- "2:\n"
- ".skip -(((5f-4f) - (2b-1b)) > 0) * "
- "((5f-4f) - (2b-1b)),0x90\n"
- "3:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 4f - .\n" /* repl offset */
- " .word %P1\n" /* always replace */
- " .byte 3b - 1b\n" /* src len */
- " .byte 5f - 4f\n" /* repl len */
- " .byte 3b - 2b\n" /* pad len */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "4: jmp %l[t_no]\n"
- "5:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 0\n" /* no replacement */
- " .word %P0\n" /* feature bit */
- " .byte 3b - 1b\n" /* src len */
- " .byte 0\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .altinstr_aux,\"ax\"\n"
- "6:\n"
- " testb %[bitnum],%[cap_byte]\n"
- " jnz %l[t_yes]\n"
- " jmp %l[t_no]\n"
- ".previous\n"
- : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
- [bitnum] "i" (1 << (bit & 7)),
- [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
- : : t_yes, t_no);
- t_yes:
- return true;
- t_no:
- return false;
-}
-
-#define static_cpu_has(bit) \
-( \
- __builtin_constant_p(boot_cpu_has(bit)) ? \
- boot_cpu_has(bit) : \
- _static_cpu_has(bit) \
-)
-#else
-/*
- * Fall back to dynamic for gcc versions which don't support asm goto. Should be
- * a minority now anyway.
- */
-#define static_cpu_has(bit) boot_cpu_has(bit)
-#endif
-
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit))
diff --git a/arch/x86/include/asm/cpuinfo.h b/arch/x86/include/asm/cpuinfo.h
new file mode 100644
index 000000000000..a6632044f199
--- /dev/null
+++ b/arch/x86/include/asm/cpuinfo.h
@@ -0,0 +1,65 @@
+#ifndef _ASM_X86_CPUINFO_H_
+#define _ASM_X86_CPUINFO_H_
+
+/*
+ * CPU type and hardware bug flags. Kept separately for each CPU.
+ * Members of this structure are referenced in head.S, so think twice
+ * before touching them. [mj]
+ */
+struct cpuinfo_x86 {
+ __u8 x86; /* CPU family */
+ __u8 x86_vendor; /* CPU vendor */
+ __u8 x86_model;
+ __u8 x86_mask;
+#ifdef CONFIG_X86_32
+ char wp_works_ok; /* It doesn't on 386's */
+
+ /* Problems on some 486Dx4's and old 386's: */
+ char rfu;
+ char pad0;
+ char pad1;
+#else
+ /* Number of 4K pages in DTLB/ITLB combined(in pages): */
+ int x86_tlbsize;
+#endif
+ __u8 x86_virt_bits;
+ __u8 x86_phys_bits;
+ /* CPUID returned core id bits: */
+ __u8 x86_coreid_bits;
+ /* Max extended CPUID function supported: */
+ __u32 extended_cpuid_level;
+ /* Maximum supported CPUID level, -1=no CPUID: */
+ int cpuid_level;
+ __u32 x86_capability[NCAPINTS + NBUGINTS];
+ char x86_vendor_id[16];
+ char x86_model_id[64];
+ /* in KB - valid for CPUS which support this call: */
+ int x86_cache_size;
+ int x86_cache_alignment; /* In bytes */
+ /* Cache QoS architectural values: */
+ int x86_cache_max_rmid; /* max index */
+ int x86_cache_occ_scale; /* scale to bytes */
+ int x86_power;
+ unsigned long loops_per_jiffy;
+ /* cpuid returned max cores value: */
+ u16 x86_max_cores;
+ u16 apicid;
+ u16 initial_apicid;
+ u16 x86_clflush_size;
+ /* number of cores as seen by the OS: */
+ u16 booted_cores;
+ /* Physical processor id: */
+ u16 phys_proc_id;
+ /* Logical processor id: */
+ u16 logical_proc_id;
+ /* Core id: */
+ u16 cpu_core_id;
+ /* Index into per_cpu list: */
+ u16 cpu_index;
+ u32 microcode;
+};
+
+extern struct cpuinfo_x86 boot_cpu_data;
+extern struct cpuinfo_x86 new_cpu_data;
+
+#endif /* _ASM_X86_CPUINFO_H_ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 62c6cc3cc5d3..6f6555b20e3d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -22,6 +22,7 @@ struct vm86;
#include <asm/nops.h>
#include <asm/special_insns.h>
#include <asm/fpu/types.h>
+#include <asm/cpuinfo.h>
#include <linux/personality.h>
#include <linux/cache.h>
@@ -78,65 +79,6 @@ extern u16 __read_mostly tlb_lld_2m[NR_INFO];
extern u16 __read_mostly tlb_lld_4m[NR_INFO];
extern u16 __read_mostly tlb_lld_1g[NR_INFO];
-/*
- * CPU type and hardware bug flags. Kept separately for each CPU.
- * Members of this structure are referenced in head.S, so think twice
- * before touching them. [mj]
- */
-
-struct cpuinfo_x86 {
- __u8 x86; /* CPU family */
- __u8 x86_vendor; /* CPU vendor */
- __u8 x86_model;
- __u8 x86_mask;
-#ifdef CONFIG_X86_32
- char wp_works_ok; /* It doesn't on 386's */
-
- /* Problems on some 486Dx4's and old 386's: */
- char rfu;
- char pad0;
- char pad1;
-#else
- /* Number of 4K pages in DTLB/ITLB combined(in pages): */
- int x86_tlbsize;
-#endif
- __u8 x86_virt_bits;
- __u8 x86_phys_bits;
- /* CPUID returned core id bits: */
- __u8 x86_coreid_bits;
- /* Max extended CPUID function supported: */
- __u32 extended_cpuid_level;
- /* Maximum supported CPUID level, -1=no CPUID: */
- int cpuid_level;
- __u32 x86_capability[NCAPINTS + NBUGINTS];
- char x86_vendor_id[16];
- char x86_model_id[64];
- /* in KB - valid for CPUS which support this call: */
- int x86_cache_size;
- int x86_cache_alignment; /* In bytes */
- /* Cache QoS architectural values: */
- int x86_cache_max_rmid; /* max index */
- int x86_cache_occ_scale; /* scale to bytes */
- int x86_power;
- unsigned long loops_per_jiffy;
- /* cpuid returned max cores value: */
- u16 x86_max_cores;
- u16 apicid;
- u16 initial_apicid;
- u16 x86_clflush_size;
- /* number of cores as seen by the OS: */
- u16 booted_cores;
- /* Physical processor id: */
- u16 phys_proc_id;
- /* Logical processor id: */
- u16 logical_proc_id;
- /* Core id: */
- u16 cpu_core_id;
- /* Index into per_cpu list: */
- u16 cpu_index;
- u32 microcode;
-};
-
#define X86_VENDOR_INTEL 0
#define X86_VENDOR_CYRIX 1
#define X86_VENDOR_AMD 2
@@ -151,9 +93,6 @@ struct cpuinfo_x86 {
/*
* capabilities of CPUs
*/
-extern struct cpuinfo_x86 boot_cpu_data;
-extern struct cpuinfo_x86 new_cpu_data;
-
extern struct tss_struct doublefault_tss;
extern __u32 cpu_caps_cleared[NCAPINTS];
extern __u32 cpu_caps_set[NCAPINTS];
diff --git a/arch/x86/include/asm/static_cpu_has.h b/arch/x86/include/asm/static_cpu_has.h
new file mode 100644
index 000000000000..648ada0c7ffe
--- /dev/null
+++ b/arch/x86/include/asm/static_cpu_has.h
@@ -0,0 +1,116 @@
+#ifndef _ASM_X86_STATIC_CPU_HAS_H
+#define _ASM_X86_STATIC_CPU_HAS_H
+
+#include <asm/cpuinfo.h>
+
+#define test_cpu_cap(c, bit) \
+ test_bit(bit, (unsigned long *)((c)->x86_capability))
+
+#define REQUIRED_MASK_BIT_SET(bit) \
+ ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0 )) || \
+ (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1 )) || \
+ (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2 )) || \
+ (((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3 )) || \
+ (((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4 )) || \
+ (((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5 )) || \
+ (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6 )) || \
+ (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7 )) || \
+ (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8 )) || \
+ (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9 )) || \
+ (((bit)>>5)==10 && (1UL<<((bit)&31) & REQUIRED_MASK10)) || \
+ (((bit)>>5)==11 && (1UL<<((bit)&31) & REQUIRED_MASK11)) || \
+ (((bit)>>5)==12 && (1UL<<((bit)&31) & REQUIRED_MASK12)) || \
+ (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK13)) || \
+ (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK14)) || \
+ (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK15)) || \
+ (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
+
+#define DISABLED_MASK_BIT_SET(bit) \
+ ( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0 )) || \
+ (((bit)>>5)==1 && (1UL<<((bit)&31) & DISABLED_MASK1 )) || \
+ (((bit)>>5)==2 && (1UL<<((bit)&31) & DISABLED_MASK2 )) || \
+ (((bit)>>5)==3 && (1UL<<((bit)&31) & DISABLED_MASK3 )) || \
+ (((bit)>>5)==4 && (1UL<<((bit)&31) & DISABLED_MASK4 )) || \
+ (((bit)>>5)==5 && (1UL<<((bit)&31) & DISABLED_MASK5 )) || \
+ (((bit)>>5)==6 && (1UL<<((bit)&31) & DISABLED_MASK6 )) || \
+ (((bit)>>5)==7 && (1UL<<((bit)&31) & DISABLED_MASK7 )) || \
+ (((bit)>>5)==8 && (1UL<<((bit)&31) & DISABLED_MASK8 )) || \
+ (((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9 )) || \
+ (((bit)>>5)==10 && (1UL<<((bit)&31) & DISABLED_MASK10)) || \
+ (((bit)>>5)==11 && (1UL<<((bit)&31) & DISABLED_MASK11)) || \
+ (((bit)>>5)==12 && (1UL<<((bit)&31) & DISABLED_MASK12)) || \
+ (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK13)) || \
+ (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK14)) || \
+ (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK15)) || \
+ (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
+
+#define cpu_has(c, bit) \
+ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
+ test_cpu_cap(c, bit))
+
+#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
+
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
+/*
+ * Static testing of CPU features. Used the same as boot_cpu_has().
+ * These will statically patch the target code for additional
+ * performance.
+ */
+static __always_inline __pure bool _static_cpu_has(u16 bit)
+{
+ asm_volatile_goto("1: jmp 6f\n"
+ "2:\n"
+ ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+ "((5f-4f) - (2b-1b)),0x90\n"
+ "3:\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 4f - .\n" /* repl offset */
+ " .word %P1\n" /* always replace */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 5f - 4f\n" /* repl len */
+ " .byte 3b - 2b\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_replacement,\"ax\"\n"
+ "4: jmp %l[t_no]\n"
+ "5:\n"
+ ".previous\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 0\n" /* no replacement */
+ " .word %P0\n" /* feature bit */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 0\n" /* repl len */
+ " .byte 0\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ " testb %[bitnum],%[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".previous\n"
+ : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
+ [bitnum] "i" (1 << (bit & 7)),
+ [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+ : : t_yes, t_no);
+ t_yes:
+ return true;
+ t_no:
+ return false;
+}
+
+#define static_cpu_has(bit) \
+( \
+ __builtin_constant_p(boot_cpu_has(bit)) ? \
+ boot_cpu_has(bit) : \
+ _static_cpu_has(bit) \
+)
+#else
+/*
+ * Fall back to dynamic for gcc versions which don't support asm goto. Should be
+ * a minority now anyway.
+ */
+#define static_cpu_has(bit) boot_cpu_has(bit)
+#endif
+
+#endif /* _ASM_X86_STATIC_CPU_HAS_H */
diff --git a/lib/Makefile b/lib/Makefile
index a65e9a861535..55ad20701dc0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n
KCOV_INSTRUMENT_list_debug.o := n
KCOV_INSTRUMENT_debugobjects.o := n
KCOV_INSTRUMENT_dynamic_debug.o := n
-# Kernel does not boot if we instrument this file as it uses custom calling
-# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
-KCOV_INSTRUMENT_hweight.o := n
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o dump_stack.o timerqueue.o\
@@ -72,8 +69,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
-GCOV_PROFILE_hweight.o := n
-CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
obj-$(CONFIG_BTREE) += btree.o
--
2.7.3
SUSE Linux GmbH, GF: Felix ImendÃrffer, Jane Smithard, Graham Norton, HRB 21284 (AG NÃrnberg)
--