[PATCH next] i386: Remove string functions that use 'rep scasb'

From: david . laight . linux

Date: Fri Mar 27 2026 - 15:58:14 EST


From: David Laight <david.laight.linux@xxxxxxxxx>

The fixed overhead of all the 'rep xxx' instructions is rather more
that might expect.
While 'rep movs' is getting better on more recent CPU, the same is
not true for 'rep scasb'.
On my Zen-5 it has a fixed overhead of 150 clocks and then takes 3
clocks for each byte.
I've not measured any Intel CPU, but the cost might be 'only' 40 + 2n.

Remove the asm versions of strcat() strncat() strlen() memchr()
and memscan(), the generic C versions will be faster.

It is quite likely that all these functions are slower than the generic
code on pretty much all CPU since the 486.

Signed-off-by: David Laight <david.laight.linux@xxxxxxxxx>
---
arch/x86/include/asm/string_32.h | 18 -------
arch/x86/lib/string_32.c | 89 --------------------------------
2 files changed, 107 deletions(-)

diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index e9cce169bb4c..b245db5d7f3c 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -12,12 +12,6 @@ extern char *strcpy(char *dest, const char *src);
#define __HAVE_ARCH_STRNCPY
extern char *strncpy(char *dest, const char *src, size_t count);

-#define __HAVE_ARCH_STRCAT
-extern char *strcat(char *dest, const char *src);
-
-#define __HAVE_ARCH_STRNCAT
-extern char *strncat(char *dest, const char *src, size_t count);
-
#define __HAVE_ARCH_STRCMP
extern int strcmp(const char *cs, const char *ct);

@@ -27,9 +21,6 @@ extern int strncmp(const char *cs, const char *ct, size_t count);
#define __HAVE_ARCH_STRCHR
extern char *strchr(const char *s, int c);

-#define __HAVE_ARCH_STRLEN
-extern size_t strlen(const char *s);
-
static __always_inline void *__memcpy(void *to, const void *from, size_t n)
{
int d0, d1, d2;
@@ -159,9 +150,6 @@ extern int memcmp(const void *, const void *, size_t);
#define memcmp __builtin_memcmp
#endif

-#define __HAVE_ARCH_MEMCHR
-extern void *memchr(const void *cs, int c, size_t count);
-
static inline void *__memset_generic(void *s, char c, size_t count)
{
int d0, d1;
@@ -216,12 +204,6 @@ static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
return s;
}

-/*
- * find the first occurrence of byte 'c', or 1 past the area if none
- */
-#define __HAVE_ARCH_MEMSCAN
-extern void *memscan(void *addr, int c, size_t size);
-
#endif /* __KERNEL__ */

#endif /* _ASM_X86_STRING_32_H */
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c
index f87ec24fa579..3602e808b584 100644
--- a/arch/x86/lib/string_32.c
+++ b/arch/x86/lib/string_32.c
@@ -49,46 +49,6 @@ char *strncpy(char *dest, const char *src, size_t count)
EXPORT_SYMBOL(strncpy);
#endif

-#ifdef __HAVE_ARCH_STRCAT
-char *strcat(char *dest, const char *src)
-{
- int d0, d1, d2, d3;
- asm volatile("repne scasb\n\t"
- "decl %1\n"
- "1:\tlodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu) : "memory");
- return dest;
-}
-EXPORT_SYMBOL(strcat);
-#endif
-
-#ifdef __HAVE_ARCH_STRNCAT
-char *strncat(char *dest, const char *src, size_t count)
-{
- int d0, d1, d2, d3;
- asm volatile("repne scasb\n\t"
- "decl %1\n\t"
- "movl %8,%3\n"
- "1:\tdecl %3\n\t"
- "js 2f\n\t"
- "lodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n"
- "2:\txorl %2,%2\n\t"
- "stosb"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu), "g" (count)
- : "memory");
- return dest;
-}
-EXPORT_SYMBOL(strncat);
-#endif
-
#ifdef __HAVE_ARCH_STRCMP
int strcmp(const char *cs, const char *ct)
{
@@ -159,55 +119,6 @@ char *strchr(const char *s, int c)
EXPORT_SYMBOL(strchr);
#endif

-#ifdef __HAVE_ARCH_STRLEN
-size_t strlen(const char *s)
-{
- int d0;
- size_t res;
- asm volatile("repne scasb"
- : "=c" (res), "=&D" (d0)
- : "1" (s), "a" (0), "0" (0xffffffffu)
- : "memory");
- return ~res - 1;
-}
-EXPORT_SYMBOL(strlen);
-#endif
-
-#ifdef __HAVE_ARCH_MEMCHR
-void *memchr(const void *cs, int c, size_t count)
-{
- int d0;
- void *res;
- if (!count)
- return NULL;
- asm volatile("repne scasb\n\t"
- "je 1f\n\t"
- "movl $1,%0\n"
- "1:\tdecl %0"
- : "=D" (res), "=&c" (d0)
- : "a" (c), "0" (cs), "1" (count)
- : "memory");
- return res;
-}
-EXPORT_SYMBOL(memchr);
-#endif
-
-#ifdef __HAVE_ARCH_MEMSCAN
-void *memscan(void *addr, int c, size_t size)
-{
- if (!size)
- return addr;
- asm volatile("repnz scasb\n\t"
- "jnz 1f\n\t"
- "dec %%edi\n"
- "1:"
- : "=D" (addr), "=c" (size)
- : "0" (addr), "1" (size), "a" (c)
- : "memory");
- return addr;
-}
-EXPORT_SYMBOL(memscan);
-#endif

#ifdef __HAVE_ARCH_STRNLEN
size_t strnlen(const char *s, size_t count)
--
2.39.5