[PATCH v2 14/18] amd64: saner handling of odd address in csum_partial()

From: Al Viro
Date: Mon Dec 04 2023 - 21:25:32 EST


all we want there is to have return value congruent to
result * 256 modulo 0xffff; no need to convert from
32bit to 16bit (i.e. take it modulo 0xffff) first -
cyclic shift of 32bit value by 8 bits (in either direction)
will work.

Kills the from32to16() helper and yields better code...

Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
---
arch/x86/lib/csum-partial_64.c | 16 ++--------------
1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c
index 5e877592a7b3..192d4772c2a3 100644
--- a/arch/x86/lib/csum-partial_64.c
+++ b/arch/x86/lib/csum-partial_64.c
@@ -11,25 +11,13 @@
#include <net/checksum.h>
#include <asm/word-at-a-time.h>

-static inline unsigned short from32to16(unsigned a)
-{
- unsigned short b = a >> 16;
- asm("addw %w2,%w0\n\t"
- "adcw $0,%w0\n"
- : "=r" (b)
- : "0" (b), "r" (a));
- return b;
-}
-
static inline __wsum csum_tail(u64 temp64, int odd)
{
unsigned int result;

result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff);
- if (unlikely(odd)) {
- result = from32to16(result);
- result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
- }
+ if (unlikely(odd))
+ result = rol32(result, 8);
return (__force __wsum)result;
}

--
2.39.2