[PATCH v4 2/3] spi: tegra210-quad: Cache TRANS_STATUS in ISR for timeout handler

From: Vishwaroop A

Date: Wed Jun 10 2026 - 02:27:20 EST

On heavily loaded systems the workqueue bottom half can be delayed
long enough for wait_for_completion_timeout() to expire before the
ISR's queued work actually runs. Reading QSPI_TRANS_STATUS directly
from the controller in the timeout handler races with both the
workqueue handler and the controller itself, and can mis-classify a
transfer that genuinely timed out as having "completed".

Cache the controller status captured by the hard IRQ before it is
acked, and let the timeout handler consume that cache:

- tegra_qspi_isr() reads QSPI_FIFO_STATUS and QSPI_TRANS_STATUS,
derives tx_status / rx_status, then publishes them via WRITE_ONCE()
and the trans_status cache via smp_store_release() before masking
and acking the controller IRQ.

- tegra_qspi_handle_timeout() consumes trans_status with a paired
smp_load_acquire(). The release/acquire pair guarantees that a
timeout handler which observes a non-zero trans_status also
observes the matching status_reg / tx_status / rx_status updates.

- tegra_qspi_setup_transfer_one() clears the cache with
smp_store_release() under the spinlock before unmasking the IRQ
for the new transfer, so a stale RDY bit from the previous
transfer cannot fool the timeout handler.

- If the cache is zero at timeout time the ISR genuinely never ran
(lost IRQ), so the handler falls back to a direct controller read
so we do not unconditionally surface a timeout when the hardware
has actually finished.

curr_xfer is re-checked under the spinlock so that a workqueue handler
which races with the timeout path cannot double-complete the transfer.

Signed-off-by: Vishwaroop A <va@xxxxxxxxxx>
---
drivers/spi/spi-tegra210-quad.c | 65 ++++++++++++++++++++++++++-------
1 file changed, 51 insertions(+), 14 deletions(-)

diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index dd5d40e0dcbc..f0b15d13e433 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -214,6 +214,7 @@ struct tegra_qspi {
u32 tx_status;
u32 rx_status;
u32 status_reg;
+ u32 trans_status;
bool is_packed;
bool use_dma;

@@ -861,6 +862,13 @@ static u32 tegra_qspi_setup_transfer_one(struct spi_device *spi, struct spi_tran
tqspi->cur_rx_pos = 0;
tqspi->cur_tx_pos = 0;
tqspi->curr_xfer = t;
+ /*
+ * Pairs with smp_load_acquire() in tegra_qspi_handle_timeout().
+ * Clearing the cached trans_status before unmasking the IRQ for
+ * the new transfer prevents a stale RDY bit from the previous
+ * transfer fooling the timeout handler into a false recovery.
+ */
+ smp_store_release(&tqspi->trans_status, 0);
spin_unlock_irqrestore(&tqspi->lock, flags);

if (is_first_of_msg) {
@@ -1075,26 +1083,43 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_qspi *tqspi);
*/
static int tegra_qspi_handle_timeout(struct tegra_qspi *tqspi)
{
+ unsigned long flags;
irqreturn_t ret;
u32 status;

- /* Check if hardware actually completed the transfer */
- status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS);
+ /*
+ * Pairs with smp_store_release() in tegra_qspi_isr(). If the ISR
+ * managed to run before wait_for_completion_timeout() expired, the
+ * cached value lets us classify the timeout without racing with a
+ * concurrent HW write to QSPI_TRANS_STATUS.
+ *
+ * A zero cache means the ISR never ran for this transfer (the IRQ
+ * was lost entirely). In that case fall back to reading the
+ * controller directly so we do not unconditionally surface a
+ * timeout when the hardware has actually finished.
+ */
+ status = smp_load_acquire(&tqspi->trans_status);
+ if (!status)
+ status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS);
+
if (!(status & QSPI_RDY))
return -ETIMEDOUT;

+ spin_lock_irqsave(&tqspi->lock, flags);
/*
- * Hardware completed but interrupt was lost/delayed. Manually
- * process the completion by calling the appropriate handler.
+ * ISR or workqueue may have already completed the transfer
+ * and cleared curr_xfer between the completion timeout and now.
*/
+ if (!tqspi->curr_xfer) {
+ spin_unlock_irqrestore(&tqspi->lock, flags);
+ return 0;
+ }
+
+ spin_unlock_irqrestore(&tqspi->lock, flags);
+
dev_warn_ratelimited(tqspi->dev,
"QSPI interrupt timeout, but transfer complete\n");

- /* Clear the transfer status */
- status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS);
- tegra_qspi_writel(tqspi, status, QSPI_TRANS_STATUS);
-
- /* Manually trigger completion handler */
if (!tqspi->is_curr_dma_xfer)
ret = handle_cpu_based_xfer(tqspi);
else
@@ -1658,6 +1683,7 @@ static void tegra_qspi_work_handler(struct work_struct *work)
static irqreturn_t tegra_qspi_isr(int irq, void *context_data)
{
struct tegra_qspi *tqspi = context_data;
+ u32 status_reg, trans_status;

if (!READ_ONCE(tqspi->curr_xfer)) {
tegra_qspi_mask_clear_irq(tqspi);
@@ -1665,16 +1691,27 @@ static irqreturn_t tegra_qspi_isr(int irq, void *context_data)
}

spin_lock(&tqspi->lock);
- tqspi->status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS);
+ status_reg = tegra_qspi_readl(tqspi, QSPI_FIFO_STATUS);
+ trans_status = tegra_qspi_readl(tqspi, QSPI_TRANS_STATUS);
tegra_qspi_mask_clear_irq(tqspi);

if (tqspi->cur_direction & DATA_DIR_TX)
- tqspi->tx_status = tqspi->status_reg &
- (QSPI_TX_FIFO_UNF | QSPI_TX_FIFO_OVF);
+ WRITE_ONCE(tqspi->tx_status,
+ status_reg & (QSPI_TX_FIFO_UNF | QSPI_TX_FIFO_OVF));

if (tqspi->cur_direction & DATA_DIR_RX)
- tqspi->rx_status = tqspi->status_reg &
- (QSPI_RX_FIFO_OVF | QSPI_RX_FIFO_UNF);
+ WRITE_ONCE(tqspi->rx_status,
+ status_reg & (QSPI_RX_FIFO_OVF | QSPI_RX_FIFO_UNF));
+
+ WRITE_ONCE(tqspi->status_reg, status_reg);
+ /*
+ * smp_store_release() pairs with smp_load_acquire() in
+ * tegra_qspi_handle_timeout(). Publishing trans_status last with
+ * release semantics guarantees that a timeout handler which sees
+ * a non-zero trans_status also observes the WRITE_ONCE() updates
+ * to status_reg / tx_status / rx_status above.
+ */
+ smp_store_release(&tqspi->trans_status, trans_status);

spin_unlock(&tqspi->lock);

--
2.17.1