Re: [Patch v2 6/7] crypto: qce: common: Add support for AEAD algorithms

From: Bjorn Andersson
Date: Sun Apr 18 2021 - 23:17:11 EST


On Sat 17 Apr 08:25 CDT 2021, Thara Gopinath wrote:

> Add register programming sequence for enabling AEAD
> algorithms on the Qualcomm crypto engine.
>
> Signed-off-by: Thara Gopinath <thara.gopinath@xxxxxxxxxx>
> ---
>
> v1->v2:
> - Minor fixes like removing not needed initializing of variables
> and using bool values in lieu of 0 and 1 as pointed out by Bjorn.
> - Introduced qce_be32_to_cpu_array which converts the u8 string in big
> endian order to array of u32 and returns back total number of words,
> as per Bjorn's review comments. Presently this function is used only by
> qce_setup_regs_aead to format keys, iv and nonce. cipher and hash
> algorithms can be made to use this function as a separate clean up patch.

Thanks for reworking the patch Thara, I think it looks much more
reasonable now, just a few small questions/nits below.

>
> drivers/crypto/qce/common.c | 164 +++++++++++++++++++++++++++++++++++-
> 1 file changed, 162 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c
> index 7b3d6caec1b2..ffbf866842a3 100644
> --- a/drivers/crypto/qce/common.c
> +++ b/drivers/crypto/qce/common.c
> @@ -15,6 +15,16 @@
> #include "core.h"
> #include "regs-v5.h"
> #include "sha.h"
> +#include "aead.h"
> +
> +static const u32 std_iv_sha1[SHA256_DIGEST_SIZE / sizeof(u32)] = {
> + SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4, 0, 0, 0
> +};
> +
> +static const u32 std_iv_sha256[SHA256_DIGEST_SIZE / sizeof(u32)] = {
> + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
> + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7
> +};
>
> static inline u32 qce_read(struct qce_device *qce, u32 offset)
> {
> @@ -76,6 +86,21 @@ void qce_cpu_to_be32p_array(__be32 *dst, const u8 *src, unsigned int len)
> }
> }
>
> +static unsigned int qce_be32_to_cpu_array(u32 *dst, const u8 *src, unsigned int len)
> +{
> + __be32 *d = (__be32 *)dst;
> + const u8 *s = src;
> + unsigned int n;
> +
> + n = len / sizeof(u32);
> + for (; n > 0; n--) {
> + *d = cpu_to_be32p((const __u32 *)s);

The output is CPU endian, so this should be be32_to_cpup()

That also means that 'd' is u32 and you don't have to play tricks and
cast dst to a __be32*.

> + s += sizeof(u32);
> + d++;
> + }
> + return DIV_ROUND_UP(len, sizeof(u32));
> +}
> +
> static void qce_setup_config(struct qce_device *qce)
> {
> u32 config;
> @@ -96,7 +121,7 @@ static inline void qce_crypto_go(struct qce_device *qce, bool result_dump)
> qce_write(qce, REG_GOPROC, BIT(GO_SHIFT));
> }
>
> -#ifdef CONFIG_CRYPTO_DEV_QCE_SHA
> +#if defined(CONFIG_CRYPTO_DEV_QCE_SHA) || defined(CONFIG_CRYPTO_DEV_QCE_AEAD)
> static u32 qce_auth_cfg(unsigned long flags, u32 key_size, u32 auth_size)
> {
> u32 cfg = 0;
> @@ -139,7 +164,9 @@ static u32 qce_auth_cfg(unsigned long flags, u32 key_size, u32 auth_size)
>
> return cfg;
> }
> +#endif
>
> +#ifdef CONFIG_CRYPTO_DEV_QCE_SHA
> static int qce_setup_regs_ahash(struct crypto_async_request *async_req)
> {
> struct ahash_request *req = ahash_request_cast(async_req);
> @@ -225,7 +252,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req)
> }
> #endif
>
> -#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER
> +#if defined(CONFIG_CRYPTO_DEV_QCE_SKCIPHER) || defined(CONFIG_CRYPTO_DEV_QCE_AEAD)
> static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size)
> {
> u32 cfg = 0;
> @@ -271,7 +298,9 @@ static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size)
>
> return cfg;
> }
> +#endif
>
> +#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER
> static void qce_xts_swapiv(__be32 *dst, const u8 *src, unsigned int ivsize)
> {
> u8 swap[QCE_AES_IV_LENGTH];
> @@ -386,6 +415,133 @@ static int qce_setup_regs_skcipher(struct crypto_async_request *async_req)
> }
> #endif
>
> +#ifdef CONFIG_CRYPTO_DEV_QCE_AEAD
> +static int qce_setup_regs_aead(struct crypto_async_request *async_req)
> +{
> + struct aead_request *req = aead_request_cast(async_req);
> + struct qce_aead_reqctx *rctx = aead_request_ctx(req);
> + struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
> + struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
> + struct qce_device *qce = tmpl->qce;
> + u32 enckey[QCE_MAX_CIPHER_KEY_SIZE / sizeof(u32)] = {0};
> + u32 enciv[QCE_MAX_IV_SIZE / sizeof(u32)] = {0};
> + u32 authkey[QCE_SHA_HMAC_KEY_SIZE / sizeof(u32)] = {0};
> + u32 authiv[SHA256_DIGEST_SIZE / sizeof(u32)] = {0};
> + u32 authnonce[QCE_MAX_NONCE / sizeof(u32)] = {0};
> + unsigned int enc_keylen = ctx->enc_keylen;
> + unsigned int auth_keylen = ctx->auth_keylen;
> + unsigned int enc_ivsize = rctx->ivsize;
> + unsigned int auth_ivsize;
> + unsigned int enckey_words, enciv_words;
> + unsigned int authkey_words, authiv_words, authnonce_words;
> + unsigned long flags = rctx->flags;
> + u32 encr_cfg, auth_cfg, config, totallen;
> + u32 *iv_last_word;
> +
> + qce_setup_config(qce);
> +
> + /* Write encryption key */
> + enckey_words = qce_be32_to_cpu_array(enckey, ctx->enc_key, enc_keylen);
> + qce_write_array(qce, REG_ENCR_KEY0, (u32 *)enckey, enckey_words);

Do you really need this (u32 *) cast now?

PS. Returning the number of words turned out much better, looks good.

> +
> + /* Write encryption iv */
> + enciv_words = qce_be32_to_cpu_array(enciv, rctx->iv, enc_ivsize);
> + qce_write_array(qce, REG_CNTR0_IV0, (u32 *)enciv, enciv_words);
> +
> + if (IS_CCM(rctx->flags)) {
> + iv_last_word = (u32 *)&enciv[enciv_words - 1];

iv_last_word can be a u32 (not a pointer) and this would simply be:
iv_last_word = enciv[enciv_words - 1];

> + qce_write(qce, REG_CNTR3_IV3, (*iv_last_word) + 1);
> + qce_write_array(qce, REG_ENCR_CCM_INT_CNTR0, (u32 *)enciv, enciv_words);
> + qce_write(qce, REG_CNTR_MASK, ~0);
> + qce_write(qce, REG_CNTR_MASK0, ~0);
> + qce_write(qce, REG_CNTR_MASK1, ~0);
> + qce_write(qce, REG_CNTR_MASK2, ~0);
> + }
> +
> + /* Clear authentication IV and KEY registers of previous values */
> + qce_clear_array(qce, REG_AUTH_IV0, 16);
> + qce_clear_array(qce, REG_AUTH_KEY0, 16);
> +
> + /* Clear byte count */
> + qce_clear_array(qce, REG_AUTH_BYTECNT0, 4);
> +
> + /* Write authentication key */
> + authkey_words = qce_be32_to_cpu_array(authkey, ctx->auth_key, auth_keylen);
> + qce_write_array(qce, REG_AUTH_KEY0, (u32 *)authkey, authkey_words);
> +
> + /* Write initial authentication IV only for HMAC algorithms */
> + if (IS_SHA_HMAC(rctx->flags)) {
> + /* Write default authentication iv */
> + if (IS_SHA1_HMAC(rctx->flags)) {
> + auth_ivsize = SHA1_DIGEST_SIZE;
> + memcpy(authiv, std_iv_sha1, auth_ivsize);
> + } else if (IS_SHA256_HMAC(rctx->flags)) {
> + auth_ivsize = SHA256_DIGEST_SIZE;
> + memcpy(authiv, std_iv_sha256, auth_ivsize);
> + }
> + authiv_words = auth_ivsize / sizeof(u32);
> + qce_write_array(qce, REG_AUTH_IV0, (u32 *)authiv, authiv_words);
> + }
> +
> + /* Write nonce for CCM algorithms */
> + if (IS_CCM(rctx->flags)) {

Now I see what we where discussing in the last iteration...can't this
line just be:
} else {

or perhaps:
} else if (IS_CCM(rctx->flags)) {

I'm not saying that it's necessarily better, but if they are mutually
exclusive then it should be fine - and "self documenting".

Regards,
Bjorn