[PATCH v2 10/30] Add x86-specific parity functions

From: zengzhaoxiu
Date: Wed Apr 06 2016 - 05:21:22 EST


From: Zhaoxiu Zeng <zhaoxiu.zeng@xxxxxxxxx>

Use alternatives, lifted from arch_hweight

Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@xxxxxxxxx>
---
arch/x86/include/asm/arch_hweight.h | 5 ++
arch/x86/include/asm/arch_parity.h | 102 ++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/bitops.h | 4 +-
arch/x86/lib/Makefile | 8 +++
arch/x86/lib/parity.c | 32 ++++++++++++
5 files changed, 150 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/include/asm/arch_parity.h
create mode 100644 arch/x86/lib/parity.c

diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 02e799f..c79d50d 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -63,4 +63,9 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
}
#endif /* CONFIG_X86_32 */

+#undef POPCNT32
+#undef POPCNT64
+#undef REG_IN
+#undef REG_OUT
+
#endif
diff --git a/arch/x86/include/asm/arch_parity.h b/arch/x86/include/asm/arch_parity.h
new file mode 100644
index 0000000..09463fd
--- /dev/null
+++ b/arch/x86/include/asm/arch_parity.h
@@ -0,0 +1,100 @@
+#ifndef _ASM_X86_PARITY_H
+#define _ASM_X86_PARITY_H
+
+#include <asm/cpufeatures.h>
+
+#ifdef CONFIG_64BIT
+/* popcnt %edi, %eax -- redundant REX prefix for alignment */
+#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
+/* popcnt %rdi, %rax */
+#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
+#define REG_IN "D"
+#define REG_OUT "a"
+#else
+/* popcnt %eax, %eax */
+#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0"
+#define REG_IN "a"
+#define REG_OUT "a"
+#endif
+
+/*
+ * __sw_parityXX are called from within the alternatives below
+ * and callee-clobbered registers need to be taken care of. See
+ * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
+ * compiler switches.
+ */
+unsigned int __sw_parity32(unsigned int w);
+#ifndef CONFIG_X86_32
+unsigned int __sw_parity64(__u64 w);
+#endif
+
+static inline unsigned int __arch_parity4(unsigned int w)
+{
+ unsigned int res = 0;
+
+ asm("test $0xf, %1; setpo %b0"
+ : "+q" (res)
+ : "r" (w)
+ : "cc");
+
+ return res;
+}
+
+static inline unsigned int __arch_parity8(unsigned int w)
+{
+ unsigned int res = 0;
+
+ asm("test %1, %1; setpo %b0"
+ : "+q" (res)
+ : "r" (w)
+ : "cc");
+
+ return res;
+}
+
+static inline unsigned int __arch_parity16(unsigned int w)
+{
+ unsigned int res = 0;
+
+ asm("xor %h1, %b1; setpo %b0"
+ : "+q" (res), "+q" (w)
+ : : "cc");
+
+ return res;
+}
+
+static __always_inline unsigned int __arch_parity32(unsigned int w)
+{
+ unsigned int res;
+
+ asm(ALTERNATIVE("call __sw_parity32", POPCNT32 "; and $1, %0", X86_FEATURE_POPCNT)
+ : "="REG_OUT (res)
+ : REG_IN (w)
+ : "cc");
+
+ return res;
+}
+
+#ifdef CONFIG_X86_32
+static inline unsigned long __arch_parity64(__u64 w)
+{
+ return __arch_parity32((u32)w ^ (u32)(w >> 32));
+}
+#else
+static __always_inline unsigned long __arch_parity64(__u64 w)
+{
+ unsigned long res;
+
+ asm(ALTERNATIVE("call __sw_parity64", POPCNT64 "; and $1, %0", X86_FEATURE_POPCNT)
+ : "="REG_OUT (res)
+ : REG_IN (w)
+ : "cc");
+
+ return res;
+}
+#endif /* CONFIG_X86_32 */
+
+#undef POPCNT32
+#undef POPCNT64
+#undef REG_IN
+#undef REG_OUT
+
+#endif
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 7766d1c..f5b0122 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -498,9 +498,11 @@ static __always_inline int fls64(__u64 x)
#include <asm-generic/bitops/sched.h>

#include <asm/arch_hweight.h>
-
#include <asm-generic/bitops/const_hweight.h>

+#include <asm/arch_parity.h>
+#include <asm-generic/bitops/const_parity.h>
+
#include <asm-generic/bitops/le.h>

#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 72a5767..5716295 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -4,6 +4,9 @@

# Produces uninteresting flaky coverage.
KCOV_INSTRUMENT_delay.o := n
+# Kernel does not boot if we instrument this file as it uses custom calling
+# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
+KCOV_INSTRUMENT_parity.o := n

inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
@@ -45,3 +48,8 @@ else
lib-y += copy_user_64.o
lib-y += cmpxchg16b_emu.o
endif
+
+GCOV_PROFILE_parity.o := n
+CFLAGS_parity.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
+obj-y += parity.o
+
diff --git a/arch/x86/lib/parity.c b/arch/x86/lib/parity.c
new file mode 100644
index 0000000..762117b
--- /dev/null
+++ b/arch/x86/lib/parity.c
@@ -0,0 +1,32 @@
+#include <linux/export.h>
+#include <linux/bitops.h>
+
+unsigned int __sw_parity32(unsigned int w)
+{
+ unsigned int res;
+ w ^= w >> 16;
+ asm("xor %%ah, %%al \n"
+ "mov $0, %%eax \n"
+ "setpo %%al \n"
+ : "=a" (res)
+ : "a" (w)
+ : "cc");
+ return res;
+}
+EXPORT_SYMBOL(__sw_parity32);
+
+#ifndef CONFIG_X86_32
+unsigned int __sw_parity64(__u64 w)
+{
+ unsigned int res = (unsigned int)w ^ (unsigned int)(w >> 32);
+ res ^= res >> 16;
+ asm("xor %%ah, %%al \n"
+ "mov $0, %%eax \n"
+ "setpo %%al \n"
+ : "=a" (res)
+ : "a" (res)
+ : "cc");
+ return res;
+}
+EXPORT_SYMBOL(__sw_parity64);
+#endif
--
2.5.0