Re: [PATCH] crypto: Zhaoxin: Hardware Engine Driver for SHA1/256/384/512

From: Dave Hansen
Date: Wed Aug 02 2023 - 10:28:16 EST


This code looks pretty rough.

> +static int zhaoxin_sha1_update(struct shash_desc *desc,
> + const u8 *data, unsigned int len)
> +{
> + struct sha1_state *sctx = shash_desc_ctx(desc);
> + unsigned int partial, done;
> + const u8 *src;
> + /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
> + u8 buf[128 + ZHAOXIN_SHA_ALIGNMENT - STACK_ALIGN] __attribute__
> + ((aligned(STACK_ALIGN)));
> + u8 *dst = PTR_ALIGN(&buf[0], ZHAOXIN_SHA_ALIGNMENT);

All of the different alignments here are pretty dazzling.

> + partial = sctx->count & 0x3f;

"0x3f" is a random magic number.

> + sctx->count += len;
> + done = 0;
> + src = data;
> + memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
> +
> + if ((partial + len) >= SHA1_BLOCK_SIZE) {
> +
> + /* Append the bytes in state's buffer to a block to handle */
> + if (partial) {
> + done = -partial;
> + memcpy(sctx->buffer + partial, data,
> + done + SHA1_BLOCK_SIZE);
> + src = sctx->buffer;
> + asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
> + : "+S"(src), "+D"(dst)
> + : "a"((long)-1), "c"(1UL));

Please look around the codebase for examples on how to do this. We
usually try to use real instructions when binutils supports them and
also don't repeatedly open-code the ".byte ...".

> + done += SHA1_BLOCK_SIZE;
> + src = data + done;
> + }
> +
> + /* Process the left bytes from the input data */
> + if (len - done >= SHA1_BLOCK_SIZE) {
> + asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
> + : "+S"(src), "+D"(dst)
> + : "a"((long)-1),
> + "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
> + done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
> + src = data + done;
> + }
> + partial = 0;
> + }
> + memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);

What's the purpose of the cast?

> + memcpy(sctx->buffer + partial, src, len - done);
> +
> + return 0;
> +}
> +
> +static int zhaoxin_sha1_final(struct shash_desc *desc, u8 *out)
> +{
> + struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);

What's the purpose of *this* cast?

> + unsigned int partial, padlen;
> + __be64 bits;
> + static const u8 padding[64] = { 0x80, };
> +
> + bits = cpu_to_be64(state->count << 3);
> +
> + /* Pad out to 56 mod 64 */
> + partial = state->count & 0x3f;
> + padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
> + zhaoxin_sha1_update(desc, padding, padlen);
> +
> + /* Append length field bytes */
> + zhaoxin_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
> +
> + /* Swap to output */
> + zhaoxin_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
> +
> + return 0;
> +}
> +
> +static int zhaoxin_sha256_init(struct shash_desc *desc)
> +{
> + struct sha256_state *sctx = shash_desc_ctx(desc);
> +
> + *sctx = (struct sha256_state){
> + .state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
> + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
> + };
> +
> + return 0;
> +}
> +
> +static int zhaoxin_sha256_update(struct shash_desc *desc, const u8 *data,
> + unsigned int len)
> +{
> + struct sha256_state *sctx = shash_desc_ctx(desc);
> + unsigned int partial, done;
> + const u8 *src;
> + /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
> + u8 buf[128 + ZHAOXIN_SHA_ALIGNMENT - STACK_ALIGN] __attribute__
> + ((aligned(STACK_ALIGN)));
> + u8 *dst = PTR_ALIGN(&buf[0], ZHAOXIN_SHA_ALIGNMENT);
> +
> + partial = sctx->count & 0x3f;
> + sctx->count += len;
> + done = 0;
> + src = data;
> + memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);

That looks familiar.

This patch needs some serious cleanups and refactoring. It seems to be
missing even the basics like avoiding copy-and-pasting code. The
changelog is quite sparse.

Could you spend some more time on this and give it another go, please?