[PATCH] crypto/x86: Use CRC32 mnemonic in crc32c-intel_glue.c

From: Uros Bizjak
Date: Wed Aug 05 2020 - 15:51:09 EST


Current minimum required version of binutils is 2.23,
which supports CRC32 instruction mnemonic.

Replace the byte-wise specification of CRC32 with this proper mnemonic.
The compiler is now able to pass memory operand to the instruction,
so there is no need for a temporary register anymore.

Some examples of the improvement:

12a: 48 8b 08 mov (%rax),%rcx
12d: f2 48 0f 38 f1 f1 crc32q %rcx,%rsi
133: 48 83 c0 08 add $0x8,%rax
137: 48 39 d0 cmp %rdx,%rax
13a: 75 ee jne 12a <crc32c_intel_update+0x1a>

to:

125: f2 48 0f 38 f1 06 crc32q (%rsi),%rax
12b: 48 83 c6 08 add $0x8,%rsi
12f: 48 39 d6 cmp %rdx,%rsi
132: 75 f1 jne 125 <crc32c_intel_update+0x15>

and:

146: 0f b6 08 movzbl (%rax),%ecx
149: f2 0f 38 f0 f1 crc32b %cl,%esi
14e: 48 83 c0 01 add $0x1,%rax
152: 48 39 d0 cmp %rdx,%rax
155: 75 ef jne 146 <crc32c_intel_update+0x36>

to:

13b: f2 0f 38 f0 02 crc32b (%rdx),%eax
140: 48 83 c2 01 add $0x1,%rdx
144: 48 39 ca cmp %rcx,%rdx
147: 75 f2 jne 13b <crc32c_intel_update+0x2b>

As the compiler has some more freedom w.r.t. register allocation,
there is also a couple of reg-reg moves removed.

There are no hidden states for CRC32 insn, so there is no need to mark
assembly as volatile.

Signed-off-by: Uros Bizjak <ubizjak@xxxxxxxxx>
CC: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx>
CC: "David S. Miller" <davem@xxxxxxxxxxxxx>
CC: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CC: Ingo Molnar <mingo@xxxxxxxxxx>
CC: Borislav Petkov <bp@xxxxxxxxx>
CC: "H. Peter Anvin" <hpa@xxxxxxxxx>
---
arch/x86/crypto/crc32c-intel_glue.c | 24 ++++++++----------------
1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index d2d069bd459b..3a34b2351559 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -27,12 +27,6 @@

#define SCALE_F sizeof(unsigned long)

-#ifdef CONFIG_X86_64
-#define REX_PRE "0x48, "
-#else
-#define REX_PRE
-#endif
-
#ifdef CONFIG_X86_64
/*
* use carryless multiply version of crc32c when buffer
@@ -48,11 +42,8 @@ asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
{
while (length--) {
- __asm__ __volatile__(
- ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
- :"=S"(crc)
- :"0"(crc), "c"(*data)
- );
+ asm("crc32b %1, %0"
+ : "+r" (crc) : "rm" (*data));
data++;
}

@@ -66,11 +57,12 @@ static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len
unsigned long *ptmp = (unsigned long *)p;

while (iquotient--) {
- __asm__ __volatile__(
- ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
- :"=S"(crc)
- :"0"(crc), "c"(*ptmp)
- );
+#ifdef CONFIG_X86_64
+ asm("crc32q %1, %q0"
+#else
+ asm("crc32l %1, %0"
+#endif
+ : "+r" (crc) : "rm" (*ptmp));
ptmp++;
}

--
2.26.2