[PATCH v3 07/13] crypto: arm64/sm4 - simplify sm4_ce_expand_key() of CE implementation

From: Tianjia Zhang
Date: Thu Oct 27 2022 - 02:55:53 EST


Use a 128-bit swap mask and tbl instruction to simplify the implementation
for generating SM4 rkey_dec.

Also fixed the issue of not being wrapped by kernel_neon_begin/end() when
using the sm4_ce_expand_key() function.

Signed-off-by: Tianjia Zhang <tianjia.zhang@xxxxxxxxxxxxxxxxx>
---
arch/arm64/crypto/sm4-ce-core.S | 46 ++++++++++++++++-----------------
arch/arm64/crypto/sm4-ce-glue.c | 2 ++
2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index 41fc745a8528..9e4b4f01cdf3 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -65,32 +65,23 @@ SYM_FUNC_START(sm4_ce_expand_key)
sm4ekey v6.4s, v5.4s, v30.4s;
sm4ekey v7.4s, v6.4s, v31.4s;

+ adr_l x5, .Lbswap128_mask
+ ld1 {v24.16b}, [x5]
+
st1 {v0.16b-v3.16b}, [x1], #64;
st1 {v4.16b-v7.16b}, [x1];
- rev64 v7.4s, v7.4s;
- rev64 v6.4s, v6.4s;
- rev64 v5.4s, v5.4s;
- rev64 v4.4s, v4.4s;
- rev64 v3.4s, v3.4s;
- rev64 v2.4s, v2.4s;
- rev64 v1.4s, v1.4s;
- rev64 v0.4s, v0.4s;
- ext v7.16b, v7.16b, v7.16b, #8;
- ext v6.16b, v6.16b, v6.16b, #8;
- ext v5.16b, v5.16b, v5.16b, #8;
- ext v4.16b, v4.16b, v4.16b, #8;
- ext v3.16b, v3.16b, v3.16b, #8;
- ext v2.16b, v2.16b, v2.16b, #8;
- ext v1.16b, v1.16b, v1.16b, #8;
- ext v0.16b, v0.16b, v0.16b, #8;
- st1 {v7.16b}, [x2], #16;
- st1 {v6.16b}, [x2], #16;
- st1 {v5.16b}, [x2], #16;
- st1 {v4.16b}, [x2], #16;
- st1 {v3.16b}, [x2], #16;
- st1 {v2.16b}, [x2], #16;
- st1 {v1.16b}, [x2], #16;
- st1 {v0.16b}, [x2];
+
+ tbl v16.16b, {v7.16b}, v24.16b
+ tbl v17.16b, {v6.16b}, v24.16b
+ tbl v18.16b, {v5.16b}, v24.16b
+ tbl v19.16b, {v4.16b}, v24.16b
+ tbl v20.16b, {v3.16b}, v24.16b
+ tbl v21.16b, {v2.16b}, v24.16b
+ tbl v22.16b, {v1.16b}, v24.16b
+ tbl v23.16b, {v0.16b}, v24.16b
+
+ st1 {v16.16b-v19.16b}, [x2], #64
+ st1 {v20.16b-v23.16b}, [x2]

ret;
SYM_FUNC_END(sm4_ce_expand_key)
@@ -578,3 +569,10 @@ SYM_FUNC_START(sm4_ce_ctr_enc)

ret
SYM_FUNC_END(sm4_ce_ctr_enc)
+
+
+ .section ".rodata", "a"
+ .align 4
+.Lbswap128_mask:
+ .byte 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b
+ .byte 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index e56e81b1f35f..ff2d8442d473 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -44,8 +44,10 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;

+ kernel_neon_begin();
sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
+ kernel_neon_end();
return 0;
}

--
2.24.3 (Apple Git-128)