Re: [PATCH v4 2/3] dmaengine: imx-sdma: fix dma freezes

From: Jan Lübbe
Date: Thu Sep 19 2019 - 11:19:45 EST


Hi Philipp,

see below...

On Thu, 2019-09-19 at 16:29 +0200, Philipp Puschmann wrote:
> For some years and since many kernel versions there are reports that the
> RX UART SDMA channel stops working at some point. The workaround was to
> disable DMA for RX. This commit tries to fix the problem itself.
>
> Due to its license i wasn't able to debug the sdma script itself but it
> somehow leads to blocking the scheduling of the channel script when a
> running sdma script does not find any free descriptor in the ring to put
> its data into.
>
> If we detect such a potential case we manually restart the channel.
>
> As sdmac->desc is constant we can move desc out of the loop.
>
> Fixes: 1ec1e82f2510 ("dmaengine: Add Freescale i.MX SDMA support")
> Signed-off-by: Philipp Puschmann <philipp.puschmann@xxxxxxxxx>
> Reviewed-by: Lucas Stach <l.stach@xxxxxxxxxxxxxx>
> ---
>
> Changelog v4:
> - fixed the fixes tag
>
> Changelog v3:
> - use correct dma_wmb() instead of dma_wb()
> - add fixes tag
>
> Changelog v2:
> - clarify comment and commit description
>
> drivers/dma/imx-sdma.c | 21 +++++++++++++++++----
> 1 file changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
> index e029a2443cfc..a32b5962630e 100644
> --- a/drivers/dma/imx-sdma.c
> +++ b/drivers/dma/imx-sdma.c
> @@ -775,21 +775,23 @@ static void sdma_start_desc(struct sdma_channel *sdmac)
> static void sdma_update_channel_loop(struct sdma_channel *sdmac)
> {
> struct sdma_buffer_descriptor *bd;
> - int error = 0;
> - enum dma_status old_status = sdmac->status;
> + struct sdma_desc *desc = sdmac->desc;
> + int error = 0, cnt = 0;
> + enum dma_status old_status = sdmac->status;
>
> /*
> * loop mode. Iterate over descriptors, re-setup them and
> * call callback function.
> */
> - while (sdmac->desc) {
> - struct sdma_desc *desc = sdmac->desc;
> + while (desc) {
>
> bd = &desc->bd[desc->buf_tail];
>
> if (bd->mode.status & BD_DONE)
> break;
>
> + cnt++;
> +
> if (bd->mode.status & BD_RROR) {
> bd->mode.status &= ~BD_RROR;
> sdmac->status = DMA_ERROR;
> @@ -822,6 +824,17 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac)
> if (error)
> sdmac->status = old_status;
> }
> +
> + /* In some situations it may happen that the sdma does not found any
^ hasn't
> + * usable descriptor in the ring to put data into. The channel is
> + * stopped then. While there is no specific error condition we can
> + * check for, a necessary condition is that all available buffers for
> + * the current channel have been written to by the sdma script. In
> + * this case and after we have made the buffers available again,
> + * we restart the channel.
> + */

Are you sure we can't miss cases where we only had to make some buffers
available again, but the SDMA already ran out of buffers before?

A while ago, I was debugging a similar issue triggered by receiving
data with a wrong baud rate, which leads to all descriptors being
marked with the error flag very quickly (and the SDMA stalling).
I noticed that you can check if the channel is still running by
checking the SDMA_H_STATSTOP register & BIT(sdmac->channel).

I also added a flag for the sdmac->flags field to allow stopping the
channel from the callback (otherwise it would enable the channel
again).

Attached is my current version of that patch for reference.

> + if (cnt >= desc->num_bd)
> + sdma_enable_channel(sdmac->sdma, sdmac->channel);
> }
>
> static void mxc_sdma_handle_channel_normal(struct sdma_channel *data)
From 73d7dcf84dac5512c50448ff6adf084f1a9bd6f9 Mon Sep 17 00:00:00 2001
From: Jan Luebbe <jlu@xxxxxxxxxxxxxx>
Date: Tue, 16 Apr 2019 18:35:04 +0200
Subject: [PATCH] dmaengine: imx-sdma: restart stopped cyclic transfers

For cyclic DMA transfers, we have at least two cases where we can run
out descriptors available to the engine:
- Interrups are disabled for too long and all buffers a filled with
data.
- DMA errors (such as generated by baud rate mismatch with imx-uart) use
up all descriptors before we can react.

In this case, SDMA stops the channel and no further transfers are done
until the respective channel is disabled and re-enabled.

The best we can do in this case is to check if the transfer should still
be enabled (it could have been disabled during
sdma_update_channel_loop), but the SDMA channel is stopped. In this
case, we re-start the channel.

To avoid racing with changes to the sdmac->status field (which is
written and restored in sdma_update_channel_loop), we add a new flag
(IMX_DMA_ACTIVE) to indicate that the channel is currently active.

Signed-off-by: Jan Luebbe <jlu@xxxxxxxxxxxxxx>
---
drivers/dma/imx-sdma.c | 13 +++++++++++++
1 file changed, 13 insertions(+)

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 58fa8520892b..8774259af24c 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -383,6 +383,7 @@ struct sdma_channel {
};

#define IMX_DMA_SG_LOOP BIT(0)
+#define IMX_DMA_ACTIVE BIT(1)

#define MAX_DMA_CHANNELS 32
#define MXC_SDMA_DEFAULT_PRIORITY 1
@@ -658,6 +659,9 @@ static int sdma_config_ownership(struct sdma_channel *sdmac,

static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
{
+ struct sdma_channel *sdmac = &sdma->channel[channel];
+
+ sdmac->flags |= IMX_DMA_ACTIVE;
writel(BIT(channel), sdma->regs + SDMA_H_START);
}

@@ -774,6 +778,7 @@ static void sdma_start_desc(struct sdma_channel *sdmac)

static void sdma_update_channel_loop(struct sdma_channel *sdmac)
{
+ struct sdma_engine *sdma = sdmac->sdma;
struct sdma_buffer_descriptor *bd;
int error = 0;
enum dma_status old_status = sdmac->status;
@@ -820,6 +825,13 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac)
if (error)
sdmac->status = old_status;
}
+
+ if ((sdmac->flags & IMX_DMA_ACTIVE) &&
+ !(readl_relaxed(sdma->regs + SDMA_H_STATSTOP) & BIT(sdmac->channel))) {
+ dev_err_ratelimited(sdma->dev, "SDMA channel %d: cyclic transfer disabled by HW, reenabling\n",
+ sdmac->channel);
+ writel(BIT(sdmac->channel), sdma->regs + SDMA_H_START);
+ };
}

static void mxc_sdma_handle_channel_normal(struct sdma_channel *data)
@@ -1049,6 +1061,7 @@ static int sdma_disable_channel(struct dma_chan *chan)
struct sdma_engine *sdma = sdmac->sdma;
int channel = sdmac->channel;

+ sdmac->flags &= ~IMX_DMA_ACTIVE;
writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
sdmac->status = DMA_ERROR;

--
2.23.0