[PATCH 2/5] x86_64, -march=native: POPCNT support

From: Alexey Dobriyan
Date: Thu Jul 04 2019 - 16:48:14 EST


Detect POPCNT instruction support and inline hweigth*() functions
if it is supported by CPU.

Detect POPCNT at boot time and conditionally refuse to boot.

Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx>
---
arch/x86/include/asm/arch_hweight.h | 24 ++++++++++++++++++++++++
arch/x86/include/asm/segment.h | 1 +
arch/x86/kernel/verify_cpu.S | 8 ++++++++
arch/x86/lib/Makefile | 5 ++++-
include/linux/bitops.h | 2 ++
lib/Makefile | 2 ++
scripts/kconfig/cpuid.c | 7 +++++++
scripts/march-native.sh | 2 ++
8 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..3797aa57baa5 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -2,6 +2,28 @@
#ifndef _ASM_X86_HWEIGHT_H
#define _ASM_X86_HWEIGHT_H

+#ifdef CONFIG_MARCH_NATIVE_POPCNT
+static inline unsigned int __arch_hweight64(uint64_t x)
+{
+ return __builtin_popcountll(x);
+}
+
+static inline unsigned int __arch_hweight32(uint32_t x)
+{
+ return __builtin_popcount(x);
+}
+
+static inline unsigned int __arch_hweight16(uint16_t x)
+{
+ return __builtin_popcount(x);
+}
+
+static inline unsigned int __arch_hweight8(uint8_t x)
+{
+ return __builtin_popcount(x);
+}
+#else
+
#include <asm/cpufeatures.h>

#ifdef CONFIG_64BIT
@@ -53,3 +75,5 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
#endif /* CONFIG_X86_32 */

#endif
+
+#endif
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index ac3892920419..d314c6b9b632 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -4,6 +4,7 @@

#include <linux/const.h>
#include <asm/alternative.h>
+#include <asm/cpufeatures.h>

/*
* Constructor for a conventional segment GDT (or LDT) entry.
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index a024c4f7ba56..a9be8904faa3 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -134,6 +134,14 @@ ENTRY(verify_cpu)
movl $1,%eax
ret
.Lverify_cpu_sse_ok:
+
+#ifdef CONFIG_MARCH_NATIVE_POPCNT
+ mov $1, %eax
+ cpuid
+ bt $23, %ecx
+ jnc .Lverify_cpu_no_longmode
+#endif
+
popf # Restore caller passed flags
xorl %eax, %eax
ret
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 5246db42de45..7dc0e71b0ef3 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -40,7 +40,10 @@ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RETPOLINE) += retpoline.o

-obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
+obj-y += msr.o msr-reg.o msr-reg-export.o
+ifneq ($(CONFIG_MARCH_NATIVE_POPCNT),y)
+ obj-y += hweight.o
+endif
obj-y += iomem.o

ifeq ($(CONFIG_X86_32),y)
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index cf074bce3eb3..655b120bba66 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -7,10 +7,12 @@
#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long))

+#ifndef CONFIG_MARCH_NATIVE_POPCNT
extern unsigned int __sw_hweight8(unsigned int w);
extern unsigned int __sw_hweight16(unsigned int w);
extern unsigned int __sw_hweight32(unsigned int w);
extern unsigned long __sw_hweight64(__u64 w);
+#endif

/*
* Include this here because some architectures need generic_ffs/fls in
diff --git a/lib/Makefile b/lib/Makefile
index fb7697031a79..87e7b974cab1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -112,7 +112,9 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o

obj-y += logic_pio.o

+ifneq ($(CONFIG_MARCH_NATIVE_POPCNT),y)
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+endif

obj-$(CONFIG_BTREE) += btree.o
obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o
diff --git a/scripts/kconfig/cpuid.c b/scripts/kconfig/cpuid.c
index 81b292382e26..9efc0d9464d8 100644
--- a/scripts/kconfig/cpuid.c
+++ b/scripts/kconfig/cpuid.c
@@ -43,6 +43,8 @@ static inline void cpuid2(uint32_t eax0, uint32_t ecx0, uint32_t *eax, uint32_t
);
}

+static bool popcnt = false;
+
static uint32_t eax0_max;

static void intel(void)
@@ -52,6 +54,10 @@ static void intel(void)
if (eax0_max >= 1) {
cpuid(1, &eax, &ecx, &edx, &ebx);
// printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx);
+
+ if (ecx & (1 << 23)) {
+ popcnt = true;
+ }
}
}

@@ -72,6 +78,7 @@ int main(int argc, char *argv[])
}

#define _(x) if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE
+ _(popcnt);
#undef _

return EXIT_FAILURE;
diff --git a/scripts/march-native.sh b/scripts/march-native.sh
index 29a33c80b62b..c3059f93ed2b 100755
--- a/scripts/march-native.sh
+++ b/scripts/march-native.sh
@@ -41,6 +41,8 @@ COLLECT_GCC_OPTIONS=$(
)
echo "-march=native: $COLLECT_GCC_OPTIONS"

+"$CPUID" popcnt && option "CONFIG_MARCH_NATIVE_POPCNT"
+
for i in $COLLECT_GCC_OPTIONS; do
case $i in
*/cc1|-E|-quiet|-v|/dev/null|--param|-fstack-protector*)
--
2.21.0