[PATCH v6 09/23] drivers: crypto: meson: process more than MAXDESCS descriptors

From: Alexey Romanov
Date: Tue Mar 26 2024 - 11:35:05 EST


1. The old alhorithm was not designed to process a large
amount of memory, and therefore gave incorrect results.

2. Not all Amlogic SoC's use 3 KEY/IV descriptors.
Add keyiv descriptors count parameter to platform data.

Signed-off-by: Alexey Romanov <avromanov@xxxxxxxxxxxxxxxxx>
---
drivers/crypto/amlogic/amlogic-gxl-cipher.c | 445 ++++++++++++--------
drivers/crypto/amlogic/amlogic-gxl-core.c | 1 +
drivers/crypto/amlogic/amlogic-gxl.h | 2 +
3 files changed, 283 insertions(+), 165 deletions(-)

diff --git a/drivers/crypto/amlogic/amlogic-gxl-cipher.c b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
index e2c92505ad6b..075be767ad06 100644
--- a/drivers/crypto/amlogic/amlogic-gxl-cipher.c
+++ b/drivers/crypto/amlogic/amlogic-gxl-cipher.c
@@ -17,35 +17,41 @@
#include <crypto/internal/skcipher.h>
#include "amlogic-gxl.h"

-static bool meson_cipher_need_fallback(struct skcipher_request *areq)
+static bool meson_cipher_need_fallback_sg(struct skcipher_request *areq,
+ struct scatterlist *sg)
{
- struct scatterlist *src_sg = areq->src;
- struct scatterlist *dst_sg = areq->dst;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ unsigned int blocksize = crypto_skcipher_blocksize(tfm);
+ unsigned int cryptlen = areq->cryptlen;
+
+ while (cryptlen) {
+ unsigned int len = min(cryptlen, sg->length);
+
+ if (!IS_ALIGNED(sg->offset, sizeof(u32)))
+ return true;
+ if (len % blocksize != 0)
+ return true;
+
+ cryptlen -= len;
+ sg = sg_next(sg);
+ }
+
+ return false;
+}

+static bool meson_cipher_need_fallback(struct skcipher_request *areq)
+{
if (areq->cryptlen == 0)
return true;

- if (sg_nents(src_sg) != sg_nents(dst_sg))
+ if (meson_cipher_need_fallback_sg(areq, areq->src))
return true;

- /* KEY/IV descriptors use 3 desc */
- if (sg_nents(src_sg) > MAXDESC - 3 || sg_nents(dst_sg) > MAXDESC - 3)
- return true;
+ if (areq->dst == areq->src)
+ return false;

- while (src_sg && dst_sg) {
- if ((src_sg->length % 16) != 0)
- return true;
- if ((dst_sg->length % 16) != 0)
- return true;
- if (src_sg->length != dst_sg->length)
- return true;
- if (!IS_ALIGNED(src_sg->offset, sizeof(u32)))
- return true;
- if (!IS_ALIGNED(dst_sg->offset, sizeof(u32)))
- return true;
- src_sg = sg_next(src_sg);
- dst_sg = sg_next(dst_sg);
- }
+ if (meson_cipher_need_fallback_sg(areq, areq->dst))
+ return true;

return false;
}
@@ -76,184 +82,293 @@ static int meson_cipher_do_fallback(struct skcipher_request *areq)
return err;
}

-static int meson_cipher(struct skcipher_request *areq)
+struct cipher_ctx {
+ struct {
+ dma_addr_t addr;
+ unsigned int len;
+ } keyiv;
+
+ struct skcipher_request *areq;
+ struct scatterlist *src_sg;
+ struct scatterlist *dst_sg;
+ void *bkeyiv;
+
+ unsigned int src_offset;
+ unsigned int dst_offset;
+ unsigned int cryptlen;
+ unsigned int tloffset;
+};
+
+static int meson_map_scatterlist(struct skcipher_request *areq, struct meson_dev *mc)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
- struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
- struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
- struct meson_dev *mc = op->mc;
- struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
- struct meson_alg_template *algt;
- int flow = rctx->flow;
- unsigned int todo, eat, len;
- struct scatterlist *src_sg = areq->src;
- struct scatterlist *dst_sg = areq->dst;
- struct meson_desc *desc;
int nr_sgs, nr_sgd;
- int i, err = 0;
- unsigned int keyivlen, ivsize, offset, tloffset;
- dma_addr_t phykeyiv;
- void *backup_iv = NULL, *bkeyiv;
- u32 v;
-
- algt = container_of(alg, struct meson_alg_template, alg.skcipher.base);
-
- dev_dbg(mc->dev, "%s %s %u %x IV(%u) key=%u flow=%d\n", __func__,
- crypto_tfm_alg_name(areq->base.tfm),
- areq->cryptlen,
- rctx->op_dir, crypto_skcipher_ivsize(tfm),
- op->keylen, flow);
-
-#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
- algt->stat_req++;
- mc->chanlist[flow].stat_req++;
-#endif
-
- /*
- * The hardware expect a list of meson_desc structures.
- * The 2 first structures store key
- * The third stores IV
- */
- bkeyiv = kzalloc(48, GFP_KERNEL | GFP_DMA);
- if (!bkeyiv)
- return -ENOMEM;
-
- memcpy(bkeyiv, op->key, op->keylen);
- keyivlen = op->keylen;
-
- ivsize = crypto_skcipher_ivsize(tfm);
- if (areq->iv && ivsize > 0) {
- if (ivsize > areq->cryptlen) {
- dev_err(mc->dev, "invalid ivsize=%d vs len=%d\n", ivsize, areq->cryptlen);
- err = -EINVAL;
- goto theend;
- }
- memcpy(bkeyiv + 32, areq->iv, ivsize);
- keyivlen = 48;
- if (rctx->op_dir == MESON_DECRYPT) {
- backup_iv = kzalloc(ivsize, GFP_KERNEL);
- if (!backup_iv) {
- err = -ENOMEM;
- goto theend;
- }
- offset = areq->cryptlen - ivsize;
- scatterwalk_map_and_copy(backup_iv, areq->src, offset,
- ivsize, 0);
- }
- }
- if (keyivlen == AES_KEYSIZE_192)
- keyivlen = AES_MAX_KEY_SIZE;
-
- phykeyiv = dma_map_single(mc->dev, bkeyiv, keyivlen,
- DMA_TO_DEVICE);
- err = dma_mapping_error(mc->dev, phykeyiv);
- if (err) {
- dev_err(mc->dev, "Cannot DMA MAP KEY IV\n");
- goto theend;
- }
-
- tloffset = 0;
- eat = 0;
- i = 0;
- while (keyivlen > eat) {
- desc = &mc->chanlist[flow].tl[tloffset];
- memset(desc, 0, sizeof(struct meson_desc));
- todo = min(keyivlen - eat, 16u);
- desc->t_src = cpu_to_le32(phykeyiv + i * 16);
- desc->t_dst = cpu_to_le32(i * 16);
- v = DESC_MODE_KEY | DESC_OWN | 16;
- desc->t_status = cpu_to_le32(v);
-
- eat += todo;
- i++;
- tloffset++;
- }

if (areq->src == areq->dst) {
nr_sgs = dma_map_sg(mc->dev, areq->src, sg_nents(areq->src),
DMA_BIDIRECTIONAL);
if (!nr_sgs) {
dev_err(mc->dev, "Invalid SG count %d\n", nr_sgs);
- err = -EINVAL;
- goto theend;
+ return -EINVAL;
}
- nr_sgd = nr_sgs;
} else {
nr_sgs = dma_map_sg(mc->dev, areq->src, sg_nents(areq->src),
DMA_TO_DEVICE);
- if (!nr_sgs || nr_sgs > MAXDESC - 3) {
+ if (!nr_sgs) {
dev_err(mc->dev, "Invalid SG count %d\n", nr_sgs);
- err = -EINVAL;
- goto theend;
+ return -EINVAL;
}
+
nr_sgd = dma_map_sg(mc->dev, areq->dst, sg_nents(areq->dst),
DMA_FROM_DEVICE);
- if (!nr_sgd || nr_sgd > MAXDESC - 3) {
+ if (!nr_sgd) {
+ dma_unmap_sg(mc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
dev_err(mc->dev, "Invalid SG count %d\n", nr_sgd);
- err = -EINVAL;
- goto theend;
+ return -EINVAL;
}
}

- src_sg = areq->src;
- dst_sg = areq->dst;
- len = areq->cryptlen;
- while (src_sg) {
- desc = &mc->chanlist[flow].tl[tloffset];
- memset(desc, 0, sizeof(struct meson_desc));
-
- desc->t_src = cpu_to_le32(sg_dma_address(src_sg));
- desc->t_dst = cpu_to_le32(sg_dma_address(dst_sg));
- todo = min(len, sg_dma_len(src_sg));
- v = op->keymode | DESC_OWN | todo | algt->blockmode;
- if (rctx->op_dir)
- v |= DESC_ENCRYPTION;
- len -= todo;
-
- if (!sg_next(src_sg))
- v |= DESC_LAST;
- desc->t_status = cpu_to_le32(v);
- tloffset++;
- src_sg = sg_next(src_sg);
- dst_sg = sg_next(dst_sg);
+ return 0;
+}
+
+static void meson_unmap_scatterlist(struct skcipher_request *areq, struct meson_dev *mc)
+{
+ if (areq->src == areq->dst) {
+ dma_unmap_sg(mc->dev, areq->src, sg_nents(areq->src), DMA_BIDIRECTIONAL);
+ } else {
+ dma_unmap_sg(mc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
+ dma_unmap_sg(mc->dev, areq->dst, sg_nents(areq->dst), DMA_FROM_DEVICE);
}
+}

- reinit_completion(&mc->chanlist[flow].complete);
- meson_dma_start(mc, flow);
+static void meson_setup_keyiv_descs(struct cipher_ctx *ctx)
+{
+ struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(ctx->areq);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(ctx->areq);
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct meson_alg_template *algt = container_of(alg,
+ struct meson_alg_template, alg.skcipher.base);
+ struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct meson_dev *mc = op->mc;
+ unsigned int ivsize = crypto_skcipher_ivsize(tfm);
+ unsigned int blockmode = algt->blockmode;
+ int i;
+
+ if (ctx->tloffset)
+ return;
+
+ if (blockmode == DESC_OPMODE_CBC) {
+ memcpy(ctx->bkeyiv + AES_MAX_KEY_SIZE, ctx->areq->iv, ivsize);
+ dma_sync_single_for_device(mc->dev, ctx->keyiv.addr,
+ ctx->keyiv.len, DMA_TO_DEVICE);
+ }
+
+ for (i = 0; i < mc->pdata->setup_desc_cnt; i++) {
+ struct meson_desc *desc =
+ &mc->chanlist[rctx->flow].tl[ctx->tloffset];
+ int offset = i * 16;
+
+ desc->t_src = cpu_to_le32(ctx->keyiv.addr + offset);
+ desc->t_dst = cpu_to_le32(offset);
+ desc->t_status = cpu_to_le32(DESC_OWN | DESC_MODE_KEY | ctx->keyiv.len);
+
+ ctx->tloffset++;
+ }
+}

- err = wait_for_completion_interruptible_timeout(&mc->chanlist[flow].complete,
+static bool meson_setup_data_descs(struct cipher_ctx *ctx)
+{
+ struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(ctx->areq);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(ctx->areq);
+ struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct meson_alg_template *algt = container_of(alg,
+ struct meson_alg_template,
+ alg.skcipher.base);
+ struct meson_dev *mc = op->mc;
+ struct meson_desc *desc = &mc->chanlist[rctx->flow].tl[ctx->tloffset];
+ unsigned int blocksize = crypto_skcipher_blocksize(tfm);
+ unsigned int blockmode = algt->blockmode;
+ unsigned int maxlen = rounddown(DESC_MAXLEN, blocksize);
+ unsigned int todo;
+ u32 v;
+
+ ctx->tloffset++;
+
+ todo = min(ctx->cryptlen, maxlen);
+ todo = min(todo, ctx->cryptlen);
+ todo = min(todo, sg_dma_len(ctx->src_sg) - ctx->src_offset);
+ todo = min(todo, sg_dma_len(ctx->dst_sg) - ctx->dst_offset);
+
+ desc->t_src = cpu_to_le32(sg_dma_address(ctx->src_sg) + ctx->src_offset);
+ desc->t_dst = cpu_to_le32(sg_dma_address(ctx->dst_sg) + ctx->dst_offset);
+
+ ctx->cryptlen -= todo;
+ ctx->src_offset += todo;
+ ctx->dst_offset += todo;
+
+ v = DESC_OWN | blockmode | op->keymode | todo;
+ if (rctx->op_dir == MESON_ENCRYPT)
+ v |= DESC_ENCRYPTION;
+
+ if (!ctx->cryptlen || ctx->tloffset == MAXDESC)
+ v |= DESC_LAST;
+
+ desc->t_status = cpu_to_le32(v);
+
+ return v & DESC_LAST;
+}
+
+static int meson_kick_hardware(struct cipher_ctx *ctx)
+{
+ struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(ctx->areq);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(ctx->areq);
+ struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct meson_alg_template *algt = container_of(alg,
+ struct meson_alg_template,
+ alg.skcipher.base);
+ struct meson_dev *mc = op->mc;
+ unsigned int ivsize = crypto_skcipher_ivsize(tfm);
+ unsigned int blockmode = algt->blockmode;
+ enum dma_data_direction new_iv_dir;
+ dma_addr_t new_iv_phys;
+ void *new_iv;
+ int err;
+
+ if (blockmode == DESC_OPMODE_CBC) {
+ struct scatterlist *sg;
+ unsigned int offset;
+
+ if (rctx->op_dir == MESON_ENCRYPT) {
+ sg = ctx->dst_sg;
+ offset = ctx->dst_offset;
+ new_iv_dir = DMA_FROM_DEVICE;
+ } else {
+ sg = ctx->src_sg;
+ offset = ctx->src_offset;
+ new_iv_dir = DMA_TO_DEVICE;
+ }
+
+ if (ctx->areq->src == ctx->areq->dst)
+ new_iv_dir = DMA_BIDIRECTIONAL;
+
+ offset -= ivsize;
+ new_iv = sg_virt(sg) + offset;
+ new_iv_phys = sg_dma_address(sg) + offset;
+ }
+
+ if (blockmode == DESC_OPMODE_CBC &&
+ rctx->op_dir == MESON_DECRYPT) {
+ dma_sync_single_for_cpu(mc->dev, new_iv_phys,
+ ivsize, new_iv_dir);
+ memcpy(ctx->areq->iv, new_iv, ivsize);
+ }
+
+ reinit_completion(&mc->chanlist[rctx->flow].complete);
+ meson_dma_start(mc, rctx->flow);
+ err = wait_for_completion_interruptible_timeout(&mc->chanlist[rctx->flow].complete,
msecs_to_jiffies(500));
if (err == 0) {
- dev_err(mc->dev, "DMA timeout for flow %d\n", flow);
- err = -EINVAL;
+ dev_err(mc->dev, "DMA timeout for flow %d\n", rctx->flow);
+ return -EINVAL;
} else if (err < 0) {
dev_err(mc->dev, "Waiting for DMA completion is failed (%d)\n", err);
- } else {
- /* No error */
- err = 0;
+ return err;
}

- dma_unmap_single(mc->dev, phykeyiv, keyivlen, DMA_TO_DEVICE);
+ if (blockmode == DESC_OPMODE_CBC &&
+ rctx->op_dir == MESON_ENCRYPT) {
+ dma_sync_single_for_cpu(mc->dev, new_iv_phys,
+ ivsize, new_iv_dir);
+ memcpy(ctx->areq->iv, new_iv, ivsize);
+ }

- if (areq->src == areq->dst) {
- dma_unmap_sg(mc->dev, areq->src, sg_nents(areq->src), DMA_BIDIRECTIONAL);
- } else {
- dma_unmap_sg(mc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
- dma_unmap_sg(mc->dev, areq->dst, sg_nents(areq->dst), DMA_FROM_DEVICE);
+ ctx->tloffset = 0;
+
+ return 0;
+}
+
+static int meson_cipher(struct skcipher_request *areq)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct meson_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct meson_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+ struct meson_dev *mc = op->mc;
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct meson_alg_template *algt;
+ struct cipher_ctx ctx = {
+ .areq = areq,
+ .src_offset = 0,
+ .dst_offset = 0,
+ .src_sg = areq->src,
+ .dst_sg = areq->dst,
+ .cryptlen = areq->cryptlen,
+ };
+ int err;
+
+ dev_dbg(mc->dev, "%s %s %u %x IV(%u) key=%u ctx.flow=%d\n", __func__,
+ crypto_tfm_alg_name(areq->base.tfm),
+ areq->cryptlen,
+ rctx->op_dir, crypto_skcipher_ivsize(tfm),
+ op->keylen, rctx->flow);
+
+ algt = container_of(alg, struct meson_alg_template, alg.skcipher.base);
+
+#ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
+ algt->stat_req++;
+ mc->chanlist[rctx->flow].stat_req++;
+#endif
+
+ ctx.bkeyiv = kzalloc(48, GFP_KERNEL | GFP_DMA);
+ if (!ctx.bkeyiv)
+ return -ENOMEM;
+
+ memcpy(ctx.bkeyiv, op->key, op->keylen);
+ ctx.keyiv.len = op->keylen;
+ if (ctx.keyiv.len == AES_KEYSIZE_192)
+ ctx.keyiv.len = AES_MAX_KEY_SIZE;
+
+ ctx.keyiv.addr = dma_map_single(mc->dev, ctx.bkeyiv, ctx.keyiv.len,
+ DMA_TO_DEVICE);
+ err = dma_mapping_error(mc->dev, ctx.keyiv.addr);
+ if (err) {
+ dev_err(mc->dev, "Cannot DMA MAP KEY IV\n");
+ goto free_keyiv;
}

- if (areq->iv && ivsize > 0) {
- if (rctx->op_dir == MESON_DECRYPT) {
- memcpy(areq->iv, backup_iv, ivsize);
- } else {
- scatterwalk_map_and_copy(areq->iv, areq->dst,
- areq->cryptlen - ivsize,
- ivsize, 0);
+ err = meson_map_scatterlist(areq, mc);
+ if (err)
+ goto unmap_keyiv;
+
+ ctx.tloffset = 0;
+
+ while (ctx.cryptlen) {
+ meson_setup_keyiv_descs(&ctx);
+
+ if (meson_setup_data_descs(&ctx)) {
+ err = meson_kick_hardware(&ctx);
+ if (err)
+ break;
+ }
+
+ if (ctx.src_offset == sg_dma_len(ctx.src_sg)) {
+ ctx.src_offset = 0;
+ ctx.src_sg = sg_next(ctx.src_sg);
+ }
+
+ if (ctx.dst_offset == sg_dma_len(ctx.dst_sg)) {
+ ctx.dst_offset = 0;
+ ctx.dst_sg = sg_next(ctx.dst_sg);
}
}
-theend:
- kfree_sensitive(bkeyiv);
- kfree_sensitive(backup_iv);
+
+ meson_unmap_scatterlist(areq, mc);
+
+unmap_keyiv:
+ dma_unmap_single(mc->dev, ctx.keyiv.addr, ctx.keyiv.len, DMA_TO_DEVICE);
+
+free_keyiv:
+ kfree_sensitive(ctx.bkeyiv);

return err;
}
diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c
index 98e63e67aa6e..5fea95e9876b 100644
--- a/drivers/crypto/amlogic/amlogic-gxl-core.c
+++ b/drivers/crypto/amlogic/amlogic-gxl-core.c
@@ -258,6 +258,7 @@ static void meson_crypto_remove(struct platform_device *pdev)
static const struct meson_pdata meson_gxl_pdata = {
.descs_reg = 0x0,
.status_reg = 0x4,
+ .setup_desc_cnt = 3,
};

static const struct of_device_id meson_crypto_of_match_table[] = {
diff --git a/drivers/crypto/amlogic/amlogic-gxl.h b/drivers/crypto/amlogic/amlogic-gxl.h
index 1ab3462dea42..f3455babb52a 100644
--- a/drivers/crypto/amlogic/amlogic-gxl.h
+++ b/drivers/crypto/amlogic/amlogic-gxl.h
@@ -82,10 +82,12 @@ struct meson_flow {
* struct meson_pdata - SoC series dependent data.
* @reg_descs: offset to descriptors register
* @reg_status: offset to status register
+ * @setup_desc_cnt: number of setup descriptor to configure.
*/
struct meson_pdata {
u32 descs_reg;
u32 status_reg;
+ u32 setup_desc_cnt;
};

/*
--
2.34.1