Re: [PATCH v4 5/5] spi: spi-qcom-qspi: Add DMA mode support

From: Doug Anderson
Date: Thu Apr 20 2023 - 13:20:12 EST


Hi,

On Thu, Apr 20, 2023 at 6:13 AM Vijaya Krishna Nivarthi
<quic_vnivarth@xxxxxxxxxxx> wrote:
>
> @@ -137,11 +155,29 @@ enum qspi_clocks {
> QSPI_NUM_CLKS
> };
>
> +enum qspi_xfer_mode {
> + QSPI_FIFO,
> + QSPI_DMA
> +};
> +
> +/*
> + * Number of entries in sgt returned from spi framework that-
> + * will be supported. Can be modified as required.
> + * In practice, given max_dma_len is 64KB, the number of
> + * entries is not expected to exceed 1.
> + */
> +#define QSPI_MAX_SG 5

I actually wonder if this would be more nicely done just using a
linked list, which naturally mirrors how SGs work anyway. You'd add
"struct list_head" to the end of "struct qspi_cmd_desc" and just store
a pointer to the head in "struct qcom_qspi".

For freeing, you can always get back the "virtual" address because
it's just the address of each node. You can always get back the
physical address because it's stored in "data_address".


> @@ -223,6 +261,16 @@ static void qcom_qspi_handle_err(struct spi_master *master,
> spin_lock_irqsave(&ctrl->lock, flags);
> writel(0, ctrl->base + MSTR_INT_EN);

Can you also clear all interrupts here? That will make sure that if
the interrupt somehow fires after you run that it will detect that
there's nothing to do.


> ctrl->xfer.rem_bytes = 0;
> +
> + if (ctrl->xfer_mode == QSPI_DMA) {
> + int i;
> +
> + /* free cmd descriptors */
> + for (i = 0; i < ctrl->n_cmd_desc; i++)
> + dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
> + ctrl->dma_cmd_desc[i]);
> + ctrl->n_cmd_desc = 0;
> + }

Instead of checking for ctrl->xfer_mode, why not just check for
ctrl->n_cmd_desc? Then you can get rid of "ctrl->xfer_mode".


> @@ -258,6 +306,120 @@ static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz)
> return 0;
> }
>
> +#define QSPI_ALIGN_REQ 32

nit: put this at the top of the file with other #defines.


> +static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, dma_addr_t dma_ptr,
> + uint32_t n_bytes)
> +{
> + struct qspi_cmd_desc *virt_cmd_desc, *prev;
> + dma_addr_t dma_cmd_desc;
> +
> + /* allocate for dma cmd descriptor */
> + virt_cmd_desc = (struct qspi_cmd_desc *)dma_pool_alloc(ctrl->dma_cmd_pool,
> + GFP_KERNEL, &dma_cmd_desc);

Remove unnecessary cast; "void *" assigns fine w/out a cast.

Add "| GFP_ZERO" and then get rid of the need to clear the "reserved"
and "next_descriptor" stuff below.


> + if (!virt_cmd_desc) {
> + dev_err(ctrl->dev,
> + "Could not allocate for cmd_desc\n");
> + return -ENOMEM;
> + }

You never need to add an extra message for allocation failures (they
already splat). Remove it.


> + ctrl->virt_cmd_desc[ctrl->n_cmd_desc] = virt_cmd_desc;
> + ctrl->dma_cmd_desc[ctrl->n_cmd_desc] = dma_cmd_desc;
> + ctrl->n_cmd_desc++;
> +
> + /* setup cmd descriptor */
> + virt_cmd_desc->data_address = dma_ptr;
> + virt_cmd_desc->next_descriptor = 0;
> + virt_cmd_desc->direction = ctrl->xfer.dir;
> + virt_cmd_desc->multi_io_mode = qspi_buswidth_to_iomode(ctrl, ctrl->xfer.buswidth);
> + virt_cmd_desc->reserved1 = 0;
> + virt_cmd_desc->fragment = ctrl->xfer.is_last ? 0 : 1;

virt_cmd_desc->fragment = !ctrl->xfer.is_last;


> + virt_cmd_desc->reserved2 = 0;
> + virt_cmd_desc->length = n_bytes;
> +
> + /* update previous descriptor */
> + if (ctrl->n_cmd_desc >= 2) {
> + prev = (ctrl->virt_cmd_desc)[ctrl->n_cmd_desc - 2];
> + prev->next_descriptor = dma_cmd_desc;
> + prev->fragment = 1;
> + }
> +
> + return 0;
> +}
> +
> +static int qcom_qspi_setup_dma_desc(struct qcom_qspi *ctrl,
> + struct spi_transfer *xfer)
> +{
> + int ret;
> + struct sg_table *sgt;
> + unsigned int sg_total_len = 0;
> + dma_addr_t dma_ptr_sg;
> + unsigned int dma_len_sg;
> + int i;
> +
> + if (ctrl->n_cmd_desc) {
> + dev_err(ctrl->dev, "Remnant dma buffers n_cmd_desc-%d\n", ctrl->n_cmd_desc);
> + return -EIO;
> + }
> +
> + sgt = (ctrl->xfer.dir == QSPI_READ) ? &xfer->rx_sg : &xfer->tx_sg;
> + if (!sgt->nents || sgt->nents > QSPI_MAX_SG) {
> + dev_err(ctrl->dev, "Cannot handle %d entries in scatter list\n", sgt->nents);
> + return -EAGAIN;

If you're retrying, don't use "dev_err" but instead "dev_warn".
Similar in other places.


> + }
> +
> + for (i = 0; i < sgt->nents; i++) {
> + dma_ptr_sg = sg_dma_address(sgt->sgl + i);
> + if (!IS_ALIGNED(dma_ptr_sg, QSPI_ALIGN_REQ)) {
> + dev_err(ctrl->dev, "dma address-%pad not aligned to %d\n",
> + &dma_ptr_sg, QSPI_ALIGN_REQ);

In general it's not good practice to put pointer values into the error
log as it can be an attack vector. Probably the %p will be replaced
with something bogus anyway, but it'll look weird.


> + return -EAGAIN;
> + }
> + sg_total_len += sg_dma_len(sgt->sgl + i);
> + }
> +
> + if (sg_total_len != xfer->len) {
> + dev_err(ctrl->dev, "Data lengths mismatch\n");
> + return -EAGAIN;
> + }

This feels like overly defensive programming. The SPI framework is
what's in charge of setting up the scatter gather lists and it can be
trusted to give you something where the total transfer length matches.
IMO, drop that validation. I'm OK w/ keeping the double-check of the
alignment since that's handled by the client drivers and they are less
trustworthy.


> +
> + for (i = 0; i < sgt->nents; i++) {
> + dma_ptr_sg = sg_dma_address(sgt->sgl + i);
> + dma_len_sg = sg_dma_len(sgt->sgl + i);
> +
> + ret = qcom_qspi_alloc_desc(ctrl, dma_ptr_sg, dma_len_sg);
> + if (ret)
> + goto cleanup;
> + }
> + return 0;
> +
> +cleanup:
> + dev_err(ctrl->dev, "ERROR cleanup in setup_dma_desc\n");

Drop above print--we should have already printed any relevant errors.


> @@ -290,8 +454,37 @@ static int qcom_qspi_transfer_one(struct spi_master *master,
> ctrl->xfer.is_last = list_is_last(&xfer->transfer_list,
> &master->cur_msg->transfers);
> ctrl->xfer.rem_bytes = xfer->len;
> +
> + if (xfer->rx_sg.nents || xfer->tx_sg.nents) {
> + /* do DMA transfer */
> + ctrl->xfer_mode = QSPI_DMA;
> + if (!(mstr_cfg & DMA_ENABLE)) {
> + mstr_cfg |= DMA_ENABLE;
> + writel(mstr_cfg, ctrl->base + MSTR_CONFIG);
> + }
> +
> + ret = qcom_qspi_setup_dma_desc(ctrl, xfer);
> + if (ret) {
> + if (ret == -EAGAIN) {
> + dev_err_once(ctrl->dev, "DMA failure, falling back to PIO");
> + goto do_pio;
> + }
> + spin_unlock_irqrestore(&ctrl->lock, flags);
> + return ret;
> + }
> + qcom_qspi_dma_xfer(ctrl);
> + goto end;
> + }
> +
> +do_pio:

A bit nitty, but the "do_pio" label feels like a slight stretch from
what I consider the acceptable uses of "goto". Maybe it's better to
avoid it? The "end" label is OK w/ me, though usually I see it called
"exit". AKA:

...
ret = qcom_qspi_setup_dma_desc(ctrl, xfer);
if (ret != -EAGAIN) {
if (!ret)
qcom_qspi_dma_xfer(ctrl);
goto exit;
}
dev_warn_once(...);
ret = 0; /* We'll retry w/ PIO */
}

...
...

exit:
spin_unlock_irqrestore(&ctrl->lock, flags);

if (ret)
return ret;

/* We'll call spi_finalize_current_transfer() when done */
return 1;


> @@ -328,6 +521,17 @@ static int qcom_qspi_prepare_message(struct spi_master *master,
> return 0;
> }
>
> +static int qcom_qspi_alloc_dma(struct qcom_qspi *ctrl)
> +{
> + /* allocate for cmd descriptors pool */

The above comment doesn't add much. Drop?


> @@ -426,27 +630,48 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)
> int_status = readl(ctrl->base + MSTR_INT_STATUS);
> writel(int_status, ctrl->base + MSTR_INT_STATUS);
>
> - if (ctrl->xfer.dir == QSPI_WRITE) {
> - if (int_status & WR_FIFO_EMPTY)
> - ret = pio_write(ctrl);
> - } else {
> - if (int_status & RESP_FIFO_RDY)
> - ret = pio_read(ctrl);
> - }
> -
> - if (int_status & QSPI_ERR_IRQS) {
> - if (int_status & RESP_FIFO_UNDERRUN)
> - dev_err(ctrl->dev, "IRQ error: FIFO underrun\n");
> - if (int_status & WR_FIFO_OVERRUN)
> - dev_err(ctrl->dev, "IRQ error: FIFO overrun\n");
> - if (int_status & HRESP_FROM_NOC_ERR)
> - dev_err(ctrl->dev, "IRQ error: NOC response error\n");
> - ret = IRQ_HANDLED;
> - }
> -
> - if (!ctrl->xfer.rem_bytes) {
> - writel(0, ctrl->base + MSTR_INT_EN);
> - spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
> + switch (ctrl->xfer_mode) {
> + case QSPI_FIFO:
> + if (ctrl->xfer.dir == QSPI_WRITE) {
> + if (int_status & WR_FIFO_EMPTY)
> + ret = pio_write(ctrl);
> + } else {
> + if (int_status & RESP_FIFO_RDY)
> + ret = pio_read(ctrl);
> + }
> +
> + if (int_status & QSPI_ERR_IRQS) {
> + if (int_status & RESP_FIFO_UNDERRUN)
> + dev_err(ctrl->dev, "IRQ error: FIFO underrun\n");
> + if (int_status & WR_FIFO_OVERRUN)
> + dev_err(ctrl->dev, "IRQ error: FIFO overrun\n");
> + if (int_status & HRESP_FROM_NOC_ERR)
> + dev_err(ctrl->dev, "IRQ error: NOC response error\n");
> + ret = IRQ_HANDLED;
> + }
> +
> + if (!ctrl->xfer.rem_bytes) {
> + writel(0, ctrl->base + MSTR_INT_EN);
> + spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
> + }
> + break;
> + case QSPI_DMA:
> + if (int_status & DMA_CHAIN_DONE) {
> + int i;
> +
> + writel(0, ctrl->base + MSTR_INT_EN);
> +
> + for (i = 0; i < ctrl->n_cmd_desc; i++)
> + dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
> + ctrl->dma_cmd_desc[i]);
> + ctrl->n_cmd_desc = 0;
> +
> + ret = IRQ_HANDLED;
> + spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
> + }
> + break;
> + default:
> + dev_err(ctrl->dev, "Unknown xfer mode:%d", ctrl->xfer_mode);

I'm still of the opinion that you should drop xfer_mode, which means
deleting it from above.


> @@ -517,7 +742,14 @@ static int qcom_qspi_probe(struct platform_device *pdev)
> return ret;
> }
>
> + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
> + if (ret)
> + return dev_err_probe(dev, ret, "could not set DMA mask\n");
> +
> master->max_speed_hz = 300000000;
> + master->max_dma_len = 65536; /* as per HPG */
> + /* intimate protocal drivers about alignment requirement */

Comment above doesn't add much and is already in the comment in the
definition of the structure. Drop it.


> + master->dma_alignment = QSPI_ALIGN_REQ;
> master->num_chipselect = QSPI_NUM_CS;
> master->bus_num = -1;
> master->dev.of_node = pdev->dev.of_node;
> @@ -528,6 +760,7 @@ static int qcom_qspi_probe(struct platform_device *pdev)
> master->prepare_message = qcom_qspi_prepare_message;
> master->transfer_one = qcom_qspi_transfer_one;
> master->handle_err = qcom_qspi_handle_err;
> + master->can_dma = qcom_qspi_can_dma;
> master->auto_runtime_pm = true;
>
> ret = devm_pm_opp_set_clkname(&pdev->dev, "core");
> @@ -540,6 +773,11 @@ static int qcom_qspi_probe(struct platform_device *pdev)
> return ret;
> }
>
> + /* allocate for DMA descriptor pools */

Above comment is obvious from the name of the function you're calling.