[PATCH v2 01/10] crypto: x86/aegis128 - access 32-bit arguments as 32-bit

From: Eric Biggers
Date: Wed Oct 16 2024 - 20:02:29 EST


From: Eric Biggers <ebiggers@xxxxxxxxxx>

Fix the AEGIS assembly code to access 'unsigned int' arguments as 32-bit
values instead of 64-bit, since the upper bits of the corresponding
64-bit registers are not guaranteed to be zero.

Note: there haven't been any reports of this bug actually causing
incorrect behavior. Neither gcc nor clang guarantee zero-extension to
64 bits, but zero-extension is likely to happen in practice because most
instructions that operate on 32-bit registers zero-extend to 64 bits.

Fixes: 1d373d4e8e15 ("crypto: x86 - Add optimized AEGIS implementations")
Cc: stable@xxxxxxxxxxxxxxx
Reviewed-by: Ondrej Mosnacek <omosnace@xxxxxxxxxx>
Signed-off-by: Eric Biggers <ebiggers@xxxxxxxxxx>
---
arch/x86/crypto/aegis128-aesni-asm.S | 29 ++++++++++++++--------------
1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index ad7f4c8916256..2de859173940e 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -19,11 +19,11 @@
#define MSG %xmm5
#define T0 %xmm6
#define T1 %xmm7

#define STATEP %rdi
-#define LEN %rsi
+#define LEN %esi
#define SRC %rdx
#define DST %rcx

.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
.align 16
@@ -74,50 +74,50 @@
*/
SYM_FUNC_START_LOCAL(__load_partial)
xor %r9d, %r9d
pxor MSG, MSG

- mov LEN, %r8
+ mov LEN, %r8d
and $0x1, %r8
jz .Lld_partial_1

- mov LEN, %r8
+ mov LEN, %r8d
and $0x1E, %r8
add SRC, %r8
mov (%r8), %r9b

.Lld_partial_1:
- mov LEN, %r8
+ mov LEN, %r8d
and $0x2, %r8
jz .Lld_partial_2

- mov LEN, %r8
+ mov LEN, %r8d
and $0x1C, %r8
add SRC, %r8
shl $0x10, %r9
mov (%r8), %r9w

.Lld_partial_2:
- mov LEN, %r8
+ mov LEN, %r8d
and $0x4, %r8
jz .Lld_partial_4

- mov LEN, %r8
+ mov LEN, %r8d
and $0x18, %r8
add SRC, %r8
shl $32, %r9
mov (%r8), %r8d
xor %r8, %r9

.Lld_partial_4:
movq %r9, MSG

- mov LEN, %r8
+ mov LEN, %r8d
and $0x8, %r8
jz .Lld_partial_8

- mov LEN, %r8
+ mov LEN, %r8d
and $0x10, %r8
add SRC, %r8
pslldq $8, MSG
movq (%r8), T0
pxor T0, MSG
@@ -137,11 +137,11 @@ SYM_FUNC_END(__load_partial)
* %r8
* %r9
* %r10
*/
SYM_FUNC_START_LOCAL(__store_partial)
- mov LEN, %r8
+ mov LEN, %r8d
mov DST, %r9

movq T0, %r10

cmp $8, %r8
@@ -675,11 +675,11 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)

movdqa MSG, T0
call __store_partial

/* mask with byte count: */
- movq LEN, T0
+ movd LEN, T0
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
movdqa .Laegis128_counter(%rip), T1
@@ -700,11 +700,12 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)
RET
SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)

/*
* void crypto_aegis128_aesni_final(void *state, void *tag_xor,
- * u64 assoclen, u64 cryptlen);
+ * unsigned int assoclen,
+ * unsigned int cryptlen);
*/
SYM_FUNC_START(crypto_aegis128_aesni_final)
FRAME_BEGIN

/* load the state: */
@@ -713,12 +714,12 @@ SYM_FUNC_START(crypto_aegis128_aesni_final)
movdqu 0x20(STATEP), STATE2
movdqu 0x30(STATEP), STATE3
movdqu 0x40(STATEP), STATE4

/* prepare length block: */
- movq %rdx, MSG
- movq %rcx, T0
+ movd %edx, MSG
+ movd %ecx, T0
pslldq $8, T0
pxor T0, MSG
psllq $3, MSG /* multiply by 8 (to get bit count) */

pxor STATE3, MSG
--
2.47.0