Re: [PATCH v3 0/7] NXP DSPI bugfixes and support for LS1028A

From: Michael Walle
Date: Fri Mar 13 2020 - 12:07:14 EST


Am 2020-03-10 16:22, schrieb Michael Walle:
Hi Vladimir,

Am 2020-03-10 15:56, schrieb Vladimir Oltean:
(2) Also, reading the flash, every second time there is (reproducibly)
an
IO error:

# hexdump -C /dev/mtd0
00000000 68 75 68 75 0a ff ff ff ff ff ff ff ff ff ff ff
|huhu............|
00000010 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
|................|
*
01000000
# hexdump -C /dev/mtd0
00000000 68 75 68 75 0a ff ff ff ff ff ff ff ff ff ff ff
|huhu............|
00000010 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
|................|
*
hexdump: /dev/mtd0: Input/output error
00dc0000
# hexdump -C /dev/mtd0
00000000 68 75 68 75 0a ff ff ff ff ff ff ff ff ff ff ff
|huhu............|
00000010 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
|................|
*
01000000
# hexdump -C /dev/mtd0
00000000 68 75 68 75 0a ff ff ff ff ff ff ff ff ff ff ff
|huhu............|
00000010 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
|................|
*
hexdump: /dev/mtd0: Input/output error
00e6a000


Just to be clear, issue 2 is seen only after you abort another
transaction, right?

No, just normal uninterrupted reading. Just tried it right after
reboot. Doesn't seem to be every second time though, just random
which makes me wonder if that is another problem now. Also the
last successful reading is random.


Ok I guess I know what the root cause is. This is an extract of
the current code:

static int dspi_transfer_one_message(struct spi_controller *ctlr,
struct spi_message *message)
{
..
/* Kick off the interrupt train */
dspi_fifo_write(dspi);

status = wait_event_interruptible(dspi->waitq,
dspi->waitflags);
dspi->waitflags = 0;
..
}

static int dspi_rxtx(struct fsl_dspi *dspi)
{
dspi_fifo_read(dspi);

if (!dspi->len)
/* Success! */
return 0;

dspi_fifo_write(dspi);

return -EINPROGRESS;
}

dspi_rxtx() is used in the ISR. Both dspi_fifo_write() and dspi_rxtx()
access shared data like, dspi->words_in_flight. In the EIO error case
the following bytes_sent is -1, because dspi->words_in_flight is -1.

/* Update total number of bytes that were transferred */
bytes_sent = dspi->words_in_flight * dspi->oper_word_size;

words_in_flight is always -1 after dspi_fifo_read() was called. In
the error case, the ISR kicks in right in the middle of the execution
of dspi_fifo_write() in dspi_transfer_one_message().

static void dspi_fifo_write(struct fsl_dspi *dspi)
{
..
if (dspi->devtype_data->trans_mode == DSPI_EOQ_MODE)
dspi_eoq_fifo_write(dspi);
else
dspi_xspi_fifo_write(dspi);

Now if the ISR is executed right here..


/* Update total number of bytes that were transferred */
bytes_sent = dspi->words_in_flight * dspi->oper_word_size;

.. words_in_flight might be -1.

msg->actual_length += bytes_sent;

and bytes_sent is negative. And this causes an IO error because
the returned overall message length doesn't match.

dspi->progress += bytes_sent / DIV_ROUND_UP(xfer->bits_per_word, 8);
..
}

I could not reproduce the issue with the following patch. I don't
know if I got the locking correct though or if there is a better
way to go.


diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 8b16de9ed382..578fedeb16a0 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -224,6 +224,7 @@ struct fsl_dspi {
u16 tx_cmd;
const struct fsl_dspi_devtype_data *devtype_data;

+ spinlock_t lock;
wait_queue_head_t waitq;
u32 waitflags;

@@ -873,14 +874,20 @@ static void dspi_fifo_write(struct fsl_dspi *dspi)

static int dspi_rxtx(struct fsl_dspi *dspi)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dspi->lock, flags);
dspi_fifo_read(dspi);

- if (!dspi->len)
+ if (!dspi->len) {
/* Success! */
+ spin_unlock_irqrestore(&dspi->lock, flags);
return 0;
+ }

dspi_fifo_write(dspi);

+ spin_unlock_irqrestore(&dspi->lock, flags);
return -EINPROGRESS;
}

@@ -950,7 +957,9 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr);
struct spi_device *spi = message->spi;
struct spi_transfer *transfer;
+ unsigned long flags;
int status = 0;
+ int i = 0;

if (dspi->irq)
dspi_enable_interrupts(dspi, true);
@@ -1009,7 +1018,9 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
goto out;
} else if (dspi->irq) {
/* Kick off the interrupt train */
+ spin_lock_irqsave(&dspi->lock, flags);
dspi_fifo_write(dspi);
+ spin_unlock_irqrestore(&dspi->lock, flags);

status = wait_event_interruptible(dspi->waitq,
dspi->waitflags);
@@ -1301,6 +1312,7 @@ static int dspi_probe(struct platform_device *pdev)
ctlr->cleanup = dspi_cleanup;
ctlr->slave_abort = dspi_slave_abort;
ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
+ spin_lock_init(&dspi->lock);

pdata = dev_get_platdata(&pdev->dev);
if (pdata) {



-michael