[PATCH] x86/crc32: improve crc32c_arch() code generation with clang

From: Eric Biggers
Date: Mon Feb 10 2025 - 16:08:47 EST


From: Eric Biggers <ebiggers@xxxxxxxxxx>

crc32c_arch() is affected by
https://github.com/llvm/llvm-project/issues/20571 where clang
unnecessarily spills the inputs to "rm"-constrained operands to the
stack. Replace "rm" with ASM_INPUT_RM which partially works around this
by expanding to "r" when the compiler is clang. This results in better
code generation with clang, though still not optimal.

Signed-off-by: Eric Biggers <ebiggers@xxxxxxxxxx>
---

This applies to
https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git/log/?h=crc-next

arch/x86/lib/crc32-glue.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c
index 9c3f9c1b7bb9..4b4721176799 100644
--- a/arch/x86/lib/crc32-glue.c
+++ b/arch/x86/lib/crc32-glue.c
@@ -53,14 +53,14 @@ u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
return crc;
}

for (num_longs = len / sizeof(unsigned long);
num_longs != 0; num_longs--, p += sizeof(unsigned long))
- asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p));
+ asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));

for (len %= sizeof(unsigned long); len; len--, p++)
- asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p));
+ asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));

return crc;
}
EXPORT_SYMBOL(crc32c_arch);


base-commit: 4ffd50862d41e5aaf2e749efa354afaa1317c309
--
2.48.1