[RFC PATCH] crypto: arc4: Implement a version optimized for memory usage
From: Christophe JAILLET
Date: Sun May 02 2021 - 15:29:58 EST
The S array does not need to be u32, u8 is enough
On machine which have efficient unaligned access, use u8 to save some
memory.
So, provide a version optimized for memory usage in such a case.
Based on my testing, the size of arc4_ctx is:
u8 version: 264
u32 version: 1032
On my machine, the u8 version is about 5% faster.
It save ~800 bytes of memory or stack depending on how arc4_ctx is stored.
It is likely to be more cache friendly.
It has been tested an Core i7-3770 with the following test program:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define u8 unsigned char
#define u32 unsigned int
#define true 1
struct arc4_ctx_8 {
u8 S[256];
u32 x, y;
};
struct arc4_ctx_32 {
u32 S[256];
u32 x, y;
};
#define S_type u8
int arc4_setkey_8(struct arc4_ctx_8 *ctx, const u8 *in_key, unsigned int key_len)
{
int i, j = 0, k = 0;
ctx->x = 1;
ctx->y = 0;
for (i = 0; i < 256; i++)
ctx->S[i] = i;
for (i = 0; i < 256; i++) {
S_type a = ctx->S[i];
j = (j + in_key[k] + a) & 0xff;
ctx->S[i] = ctx->S[j];
ctx->S[j] = a;
if (++k >= key_len)
k = 0;
}
return 0;
}
void arc4_crypt_8(struct arc4_ctx_8 *ctx, u8 *out, const u8 *in, unsigned int len)
{
S_type *const S = ctx->S;
S_type a, b, ta, tb;
u32 x, y, ty;
if (len == 0)
return;
x = ctx->x;
y = ctx->y;
a = S[x];
y = (y + a) & 0xff;
b = S[y];
do {
S[y] = a;
a = (a + b) & 0xff;
S[x] = b;
x = (x + 1) & 0xff;
ta = S[x];
ty = (y + ta) & 0xff;
tb = S[ty];
*out++ = *in++ ^ S[a];
if (--len == 0)
break;
y = ty;
a = ta;
b = tb;
} while (true);
ctx->x = x;
ctx->y = y;
}
#undef S_type
#define S_type u32
int arc4_setkey_32(struct arc4_ctx_32 *ctx, const u8 *in_key, unsigned int key_len)
{
int i, j = 0, k = 0;
ctx->x = 1;
ctx->y = 0;
for (i = 0; i < 256; i++)
ctx->S[i] = i;
for (i = 0; i < 256; i++) {
S_type a = ctx->S[i];
j = (j + in_key[k] + a) & 0xff;
ctx->S[i] = ctx->S[j];
ctx->S[j] = a;
if (++k >= key_len)
k = 0;
}
return 0;
}
void arc4_crypt_32(struct arc4_ctx_32 *ctx, u8 *out, const u8 *in, unsigned int len)
{
S_type *const S = ctx->S;
S_type a, b, ta, tb;
u32 x, y, ty;
if (len == 0)
return;
x = ctx->x;
y = ctx->y;
a = S[x];
y = (y + a) & 0xff;
b = S[y];
do {
S[y] = a;
a = (a + b) & 0xff;
S[x] = b;
x = (x + 1) & 0xff;
ta = S[x];
ty = (y + ta) & 0xff;
tb = S[ty];
*out++ = *in++ ^ S[a];
if (--len == 0)
break;
y = ty;
a = ta;
b = tb;
} while (true);
ctx->x = x;
ctx->y = y;
}
#define KEY "AZERTY"
#define in "AZERTYUIOP_QSDFGHJKLM_WXCVBN"
int main() {
long i;
struct arc4_ctx_8 ctx_8;
u8 out8[1024] = { };
struct arc4_ctx_32 ctx_32;
u8 out32[1024] = { };
arc4_setkey_8(&ctx_8, KEY, strlen(KEY));
arc4_crypt_8(&ctx_8, out8, in, strlen(in));
arc4_setkey_32(&ctx_32, KEY, strlen(KEY));
arc4_crypt_32(&ctx_32, out32, in, strlen(in));
printf("%ld vs %ld\n", sizeof(ctx_8), sizeof(ctx_32));
if (memcmp(out8, out32, 1024) == 0)
printf("Ok\n");
else
printf("Broken\n");
return 0;
}
Signed-off-by: Christophe JAILLET <christophe.jaillet@xxxxxxxxxx>
---
The idea came from code found in staging/rtl8712/
See at the top of:
https://elixir.bootlin.com/linux/v5.12/source/drivers/staging/rtl8712/rtl871x_security.c
More precisely, in an attempt to clean staging/rtl8712/, I triggered the
kernel test robot about some increasing stack usage:
https://lore.kernel.org/kernel-janitors/YHQUH+Nqc%2FzS14Tb@xxxxxxxxx/T/#m832a01a9d1517e7efc4f671ed46deae9993d6ae9
The above patch works for me, but should be taken as a RFC.
---
include/crypto/arc4.h | 8 +++++++-
lib/crypto/arc4.c | 8 ++++----
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/include/crypto/arc4.h b/include/crypto/arc4.h
index f3c22fe01704..39545ed486e2 100644
--- a/include/crypto/arc4.h
+++ b/include/crypto/arc4.h
@@ -12,8 +12,14 @@
#define ARC4_MAX_KEY_SIZE 256
#define ARC4_BLOCK_SIZE 1
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#define S_type u8
+#else
+#define S_type u32
+#endif
+
struct arc4_ctx {
- u32 S[256];
+ S_type S[256];
u32 x, y;
};
diff --git a/lib/crypto/arc4.c b/lib/crypto/arc4.c
index c2020f19c652..e0be0c2a08d9 100644
--- a/lib/crypto/arc4.c
+++ b/lib/crypto/arc4.c
@@ -21,7 +21,7 @@ int arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len)
ctx->S[i] = i;
for (i = 0; i < 256; i++) {
- u32 a = ctx->S[i];
+ S_type a = ctx->S[i];
j = (j + in_key[k] + a) & 0xff;
ctx->S[i] = ctx->S[j];
@@ -36,9 +36,9 @@ EXPORT_SYMBOL(arc4_setkey);
void arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len)
{
- u32 *const S = ctx->S;
- u32 x, y, a, b;
- u32 ty, ta, tb;
+ S_type *const S = ctx->S;
+ S_type a, b, ta, tb;
+ u32 x, y, ty;
if (len == 0)
return;
--
2.30.2