Re: [PATCH v3 3/3] spi: stm32-qspi: add automatic poll status feature

From: Patrice CHOTARD
Date: Tue May 18 2021 - 07:54:02 EST


Hi

On 5/18/21 1:39 PM, Boris Brezillon wrote:
> On Tue, 18 May 2021 11:39:51 +0200
> <patrice.chotard@xxxxxxxxxxx> wrote:
>
>> From: Patrice Chotard <patrice.chotard@xxxxxxxxxxx>
>>
>> STM32 QSPI is able to automatically poll a specified register inside the
>> memory and relieve the CPU from this task.
>>
>> As example, when erasing a large memory area, we got cpu load
>> equal to 50%. This patch allows to perform the same operation
>> with a cpu load around 2%.
>>
>> Signed-off-by: Christophe Kerello <christophe.kerello@xxxxxxxxxxx>
>> Signed-off-by: Patrice Chotard <patrice.chotard@xxxxxxxxxxx>
>> ---
>> Changes in v3:
>> - Remove spi_mem_finalize_op() API added in v2.
>>
>> Changes in v2:
>> - mask and match stm32_qspi_poll_status() parameters are 2-byte wide
>> - Make usage of new spi_mem_finalize_op() API in stm32_qspi_wait_poll_status()
>>
>> drivers/spi/spi-stm32-qspi.c | 83 ++++++++++++++++++++++++++++++++----
>> 1 file changed, 75 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
>> index 7e640ccc7e77..01168a859005 100644
>> --- a/drivers/spi/spi-stm32-qspi.c
>> +++ b/drivers/spi/spi-stm32-qspi.c
>> @@ -36,6 +36,7 @@
>> #define CR_FTIE BIT(18)
>> #define CR_SMIE BIT(19)
>> #define CR_TOIE BIT(20)
>> +#define CR_APMS BIT(22)
>> #define CR_PRESC_MASK GENMASK(31, 24)
>>
>> #define QSPI_DCR 0x04
>> @@ -53,6 +54,7 @@
>> #define QSPI_FCR 0x0c
>> #define FCR_CTEF BIT(0)
>> #define FCR_CTCF BIT(1)
>> +#define FCR_CSMF BIT(3)
>>
>> #define QSPI_DLR 0x10
>>
>> @@ -107,6 +109,7 @@ struct stm32_qspi {
>> u32 clk_rate;
>> struct stm32_qspi_flash flash[STM32_QSPI_MAX_NORCHIP];
>> struct completion data_completion;
>> + struct completion match_completion;
>> u32 fmode;
>>
>> struct dma_chan *dma_chtx;
>> @@ -115,6 +118,7 @@ struct stm32_qspi {
>>
>> u32 cr_reg;
>> u32 dcr_reg;
>> + unsigned long status_timeout;
>>
>> /*
>> * to protect device configuration, could be different between
>> @@ -128,11 +132,20 @@ static irqreturn_t stm32_qspi_irq(int irq, void *dev_id)
>> struct stm32_qspi *qspi = (struct stm32_qspi *)dev_id;
>> u32 cr, sr;
>>
>> + cr = readl_relaxed(qspi->io_base + QSPI_CR);
>> sr = readl_relaxed(qspi->io_base + QSPI_SR);
>>
>> + if (cr & CR_SMIE && sr & SR_SMF) {
>> + /* disable irq */
>> + cr &= ~CR_SMIE;
>> + writel_relaxed(cr, qspi->io_base + QSPI_CR);
>> + complete(&qspi->match_completion);
>> +
>> + return IRQ_HANDLED;
>> + }
>> +
>> if (sr & (SR_TEF | SR_TCF)) {
>> /* disable irq */
>> - cr = readl_relaxed(qspi->io_base + QSPI_CR);
>> cr &= ~CR_TCIE & ~CR_TEIE;
>> writel_relaxed(cr, qspi->io_base + QSPI_CR);
>> complete(&qspi->data_completion);
>> @@ -319,6 +332,24 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi,
>> return err;
>> }
>>
>> +static int stm32_qspi_wait_poll_status(struct stm32_qspi *qspi,
>> + const struct spi_mem_op *op)
>> +{
>> + u32 cr;
>> +
>> + reinit_completion(&qspi->match_completion);
>> + cr = readl_relaxed(qspi->io_base + QSPI_CR);
>> + writel_relaxed(cr | CR_SMIE, qspi->io_base + QSPI_CR);
>> +
>> + if (!wait_for_completion_timeout(&qspi->match_completion,
>> + msecs_to_jiffies(qspi->status_timeout)))
>> + return -ETIMEDOUT;
>> +
>> + writel_relaxed(FCR_CSMF, qspi->io_base + QSPI_FCR);
>> +
>> + return 0;
>> +}
>> +
>> static int stm32_qspi_get_mode(struct stm32_qspi *qspi, u8 buswidth)
>> {
>> if (buswidth == 4)
>> @@ -332,7 +363,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
>> struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
>> struct stm32_qspi_flash *flash = &qspi->flash[mem->spi->chip_select];
>> u32 ccr, cr;
>> - int timeout, err = 0;
>> + int timeout, err = 0, err_poll_status = 0;
>>
>> dev_dbg(qspi->dev, "cmd:%#x mode:%d.%d.%d.%d addr:%#llx len:%#x\n",
>> op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
>> @@ -378,6 +409,9 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
>> if (op->addr.nbytes && qspi->fmode != CCR_FMODE_MM)
>> writel_relaxed(op->addr.val, qspi->io_base + QSPI_AR);
>>
>> + if (qspi->fmode == CCR_FMODE_APM)
>> + err_poll_status = stm32_qspi_wait_poll_status(qspi, op);
>> +
>> err = stm32_qspi_tx(qspi, op);
>>
>> /*
>> @@ -387,7 +421,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
>> * byte of device (device size - fifo size). like device size is not
>> * knows, the prefetching is always stop.
>> */
>> - if (err || qspi->fmode == CCR_FMODE_MM)
>> + if (err || err_poll_status || qspi->fmode == CCR_FMODE_MM)
>> goto abort;
>>
>> /* wait end of tx in indirect mode */
>> @@ -406,15 +440,46 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
>> cr, !(cr & CR_ABORT), 1,
>> STM32_ABT_TIMEOUT_US);
>>
>> - writel_relaxed(FCR_CTCF, qspi->io_base + QSPI_FCR);
>> + writel_relaxed(FCR_CTCF | FCR_CSMF, qspi->io_base + QSPI_FCR);
>>
>> - if (err || timeout)
>> - dev_err(qspi->dev, "%s err:%d abort timeout:%d\n",
>> - __func__, err, timeout);
>> + if (err || err_poll_status || timeout)
>> + dev_err(qspi->dev, "%s err:%d err_poll_status:%d abort timeout:%d\n",
>> + __func__, err, err_poll_status, timeout);
>>
>> return err;
>> }
>>
>> +static int stm32_qspi_poll_status(struct spi_mem *mem, const struct spi_mem_op *op,
>> + u16 mask, u16 match,
>> + unsigned long initial_delay_us,
>> + unsigned long polling_rate_us,
>> + unsigned long timeout_ms)
>> +{
>> + struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
>> + int ret;
>> +
>
> Don't you have special constraints on the op that can be passed to poll
> status request (does it support more than 1byte of status?)? If not, I
> think you should at least call spi_mem_supports_op().

We don't have special constraints, the STM32 QSPI supports up to 32bits status.

>
>> + ret = pm_runtime_get_sync(qspi->dev);
>> + if (ret < 0) {
>> + pm_runtime_put_noidle(qspi->dev);
>> + return ret;
>> + }
>> +
>> + mutex_lock(&qspi->lock);
>> +
>> + writel_relaxed(mask, qspi->io_base + QSPI_PSMKR);
>> + writel_relaxed(match, qspi->io_base + QSPI_PSMAR);
>> + qspi->fmode = CCR_FMODE_APM;
>> + qspi->status_timeout = timeout_ms;
>> +
>> + ret = stm32_qspi_send(mem, op);
>> + mutex_unlock(&qspi->lock);
>> +
>> + pm_runtime_mark_last_busy(qspi->dev);
>> + pm_runtime_put_autosuspend(qspi->dev);
>> +
>> + return ret;
>> +}
>> +
>> static int stm32_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
>> {
>> struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
>> @@ -527,7 +592,7 @@ static int stm32_qspi_setup(struct spi_device *spi)
>> flash->presc = presc;
>>
>> mutex_lock(&qspi->lock);
>> - qspi->cr_reg = 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
>> + qspi->cr_reg = CR_APMS | 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
>> writel_relaxed(qspi->cr_reg, qspi->io_base + QSPI_CR);
>>
>> /* set dcr fsize to max address */
>> @@ -607,6 +672,7 @@ static const struct spi_controller_mem_ops stm32_qspi_mem_ops = {
>> .exec_op = stm32_qspi_exec_op,
>> .dirmap_create = stm32_qspi_dirmap_create,
>> .dirmap_read = stm32_qspi_dirmap_read,
>> + .poll_status = stm32_qspi_poll_status,
>> };
>>
>> static int stm32_qspi_probe(struct platform_device *pdev)
>> @@ -661,6 +727,7 @@ static int stm32_qspi_probe(struct platform_device *pdev)
>> }
>>
>> init_completion(&qspi->data_completion);
>> + init_completion(&qspi->match_completion);
>>
>> qspi->clk = devm_clk_get(dev, NULL);
>> if (IS_ERR(qspi->clk)) {
>