[PATCH v3 4/7] spi: spi-fsl-dspi: Fix bits-per-word acceleration in DMA mode

From: Vladimir Oltean
Date: Tue Mar 10 2020 - 08:56:05 EST


From: Vladimir Oltean <vladimir.oltean@xxxxxxx>

In DMA mode, dspi_setup_accel does not get called, which results in the
dspi->oper_word_size variable (which is used by dspi_dma_xfer) to not be
initialized properly.

Because oper_word_size is zero, a few calculations end up being
incorrect, and the DMA transfer eventually times out instead of sending
anything on the wire.

Set up native transfers (or 8-on-16 acceleration) using dspi_setup_accel
for DMA mode too.

Also take the opportunity and simplify the DMA buffer handling a little
bit.

Fixes: 6c1c26ecd9a3 ("spi: spi-fsl-dspi: Accelerate transfers using larger word size if possible")
Signed-off-by: Vladimir Oltean <vladimir.oltean@xxxxxxx>
---
Changes in v3:
Pretty much re-did the patch. Before, dspi_setup_accel was called just
once at the beginning of dspi_dma_xfer. Now it is called in the while
loop. Everything else is just refactoring that follows along.

Changes in v2:
None.

drivers/spi/spi-fsl-dspi.c | 7 +++++--
drivers/spi/spi-fsl-dspi.c | 83 +++++++++++++++++++-------------------
1 file changed, 42 insertions(+), 41 deletions(-)

diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index c59b68592283..8f5d18dc78d5 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -119,7 +119,6 @@ struct fsl_dspi_devtype_data {
enum dspi_trans_mode trans_mode;
u8 max_clock_factor;
int fifo_size;
- int dma_bufsize;
};

enum {
@@ -138,7 +137,6 @@ static const struct fsl_dspi_devtype_data devtype_data[] = {
[VF610] = {
.trans_mode = DSPI_DMA_MODE,
.max_clock_factor = 2,
- .dma_bufsize = 4096,
.fifo_size = 4,
},
[LS1021A] = {
@@ -167,19 +165,16 @@ static const struct fsl_dspi_devtype_data devtype_data[] = {
},
[LS2080A] = {
.trans_mode = DSPI_DMA_MODE,
- .dma_bufsize = 8,
.max_clock_factor = 8,
.fifo_size = 4,
},
[LS2085A] = {
.trans_mode = DSPI_DMA_MODE,
- .dma_bufsize = 8,
.max_clock_factor = 8,
.fifo_size = 4,
},
[LX2160A] = {
.trans_mode = DSPI_DMA_MODE,
- .dma_bufsize = 8,
.max_clock_factor = 8,
.fifo_size = 4,
},
@@ -191,9 +186,6 @@ static const struct fsl_dspi_devtype_data devtype_data[] = {
};

struct fsl_dspi_dma {
- /* Length of transfer in words of dspi->fifo_size */
- u32 curr_xfer_len;
-
u32 *tx_dma_buf;
struct dma_chan *chan_tx;
dma_addr_t tx_dma_phys;
@@ -352,7 +344,7 @@ static void dspi_rx_dma_callback(void *arg)
int i;

if (dspi->rx) {
- for (i = 0; i < dma->curr_xfer_len; i++)
+ for (i = 0; i < dspi->words_in_flight; i++)
dspi_push_rx(dspi, dspi->dma->rx_dma_buf[i]);
}

@@ -366,12 +358,12 @@ static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)
int time_left;
int i;

- for (i = 0; i < dma->curr_xfer_len; i++)
+ for (i = 0; i < dspi->words_in_flight; i++)
dspi->dma->tx_dma_buf[i] = dspi_pop_tx_pushr(dspi);

dma->tx_desc = dmaengine_prep_slave_single(dma->chan_tx,
dma->tx_dma_phys,
- dma->curr_xfer_len *
+ dspi->words_in_flight *
DMA_SLAVE_BUSWIDTH_4_BYTES,
DMA_MEM_TO_DEV,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -389,7 +381,7 @@ static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)

dma->rx_desc = dmaengine_prep_slave_single(dma->chan_rx,
dma->rx_dma_phys,
- dma->curr_xfer_len *
+ dspi->words_in_flight *
DMA_SLAVE_BUSWIDTH_4_BYTES,
DMA_DEV_TO_MEM,
DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -437,46 +429,56 @@ static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)
return 0;
}

+static void dspi_setup_accel(struct fsl_dspi *dspi);
+
static int dspi_dma_xfer(struct fsl_dspi *dspi)
{
struct spi_message *message = dspi->cur_msg;
struct device *dev = &dspi->pdev->dev;
- struct fsl_dspi_dma *dma = dspi->dma;
- int curr_remaining_bytes;
- int bytes_per_buffer;
+ int bytes_in_flight = dspi->len;
+ int chunk_size;
int ret = 0;

- curr_remaining_bytes = dspi->len;
- bytes_per_buffer = dspi->devtype_data->dma_bufsize /
- dspi->devtype_data->fifo_size;
- while (curr_remaining_bytes) {
+ /*
+ * dspi->len gets decremented by dspi_pop_tx_pushr in
+ * dspi_next_xfer_dma_submit
+ */
+ while (dspi->len) {
+ /* Figure out operational bits-per-word for this chunk */
+ dspi_setup_accel(dspi);
+
+ /*
+ * If the 16-bit TXDATA of the PUSHR is underutilized, then
+ * each DMA buffer will be able to hold only up to fifo_size
+ * useful bytes.
+ */
+ if (dspi->oper_word_size == 1)
+ chunk_size = dspi->devtype_data->fifo_size;
+ else
+ chunk_size = dspi->devtype_data->fifo_size * 2;
+
/* Check if current transfer fits the DMA buffer */
- dma->curr_xfer_len = curr_remaining_bytes /
- dspi->oper_word_size;
- if (dma->curr_xfer_len > bytes_per_buffer)
- dma->curr_xfer_len = bytes_per_buffer;
+ bytes_in_flight = dspi->len;
+ if (bytes_in_flight > chunk_size)
+ bytes_in_flight = chunk_size;
+
+ dspi->words_in_flight = bytes_in_flight / dspi->oper_word_size;

ret = dspi_next_xfer_dma_submit(dspi);
if (ret) {
dev_err(dev, "DMA transfer failed\n");
- goto exit;
-
- } else {
- const int len = dma->curr_xfer_len *
- dspi->oper_word_size;
- curr_remaining_bytes -= len;
- message->actual_length += len;
- if (curr_remaining_bytes < 0)
- curr_remaining_bytes = 0;
+ break;
}
+
+ message->actual_length += bytes_in_flight;
}

-exit:
return ret;
}

static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
{
+ int dma_bufsize = dspi->devtype_data->fifo_size * 2;
struct device *dev = &dspi->pdev->dev;
struct dma_slave_config cfg;
struct fsl_dspi_dma *dma;
@@ -500,14 +502,14 @@ static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
goto err_tx_channel;
}

- dma->tx_dma_buf = dma_alloc_coherent(dev, dspi->devtype_data->dma_bufsize,
+ dma->tx_dma_buf = dma_alloc_coherent(dev, dma_bufsize,
&dma->tx_dma_phys, GFP_KERNEL);
if (!dma->tx_dma_buf) {
ret = -ENOMEM;
goto err_tx_dma_buf;
}

- dma->rx_dma_buf = dma_alloc_coherent(dev, dspi->devtype_data->dma_bufsize,
+ dma->rx_dma_buf = dma_alloc_coherent(dev, dma_bufsize,
&dma->rx_dma_phys, GFP_KERNEL);
if (!dma->rx_dma_buf) {
ret = -ENOMEM;
@@ -544,10 +546,10 @@ static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
return 0;

err_slave_config:
- dma_free_coherent(dev, dspi->devtype_data->dma_bufsize,
+ dma_free_coherent(dev, dma_bufsize,
dma->rx_dma_buf, dma->rx_dma_phys);
err_rx_dma_buf:
- dma_free_coherent(dev, dspi->devtype_data->dma_bufsize,
+ dma_free_coherent(dev, dma_bufsize,
dma->tx_dma_buf, dma->tx_dma_phys);
err_tx_dma_buf:
dma_release_channel(dma->chan_tx);
@@ -562,6 +564,7 @@ static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)

static void dspi_release_dma(struct fsl_dspi *dspi)
{
+ int dma_bufsize = dspi->devtype_data->fifo_size * 2;
struct fsl_dspi_dma *dma = dspi->dma;
struct device *dev = &dspi->pdev->dev;

@@ -570,15 +573,13 @@ static void dspi_release_dma(struct fsl_dspi *dspi)

if (dma->chan_tx) {
dma_unmap_single(dev, dma->tx_dma_phys,
- dspi->devtype_data->dma_bufsize,
- DMA_TO_DEVICE);
+ dma_bufsize, DMA_TO_DEVICE);
dma_release_channel(dma->chan_tx);
}

if (dma->chan_rx) {
dma_unmap_single(dev, dma->rx_dma_phys,
- dspi->devtype_data->dma_bufsize,
- DMA_FROM_DEVICE);
+ dma_bufsize, DMA_FROM_DEVICE);
dma_release_channel(dma->chan_rx);
}
}
--
2.17.1