Re: [PATCH v3 10/13] spi: cadence-quadspi: enable PHY for direct reads and indirect writes
From: Santhosh Kumar K
Date: Mon Jun 01 2026 - 04:45:23 EST
On 28/05/26 14:39, Miquel Raynal wrote:
On 27/05/2026 at 23:25:24 +0530, Santhosh Kumar K <s-k6@xxxxxx> wrote:
After PHY tuning completes, data transfers still use the default
read-capture path. The PHY pipeline must be activated around each
eligible transfer to benefit from the calibrated delay settings.
Add cqspi_phy_enable() to toggle PHY mode. Enabling sets the calibrated
read-capture delay, asserts PHY_EN and PHY_PIPELINE, and decrements the
dummy cycle count by one since the PHY pipeline absorbs that latency.
Disabling reverses all three. Returns cqspi_wait_idle() so callers can
abort if the controller stalls on enable; disable is best-effort.
Split cqspi_direct_read_execute() so PHY-eligible reads run DMA over the
16-byte-aligned middle section with PHY active, while unaligned head and
tail bytes are transferred without PHY. PHY is used when use_phy is set,
the transfer exceeds 16 bytes, and the frequency matches the tuned rate.
cqspi_memcpy_fromio() handles small and non-DMA-able transfers, with
special handling for 8D-8D-8D to ensure 2-byte-aligned I/O accesses.
For indirect writes, PHY is enabled for transfers of at least 1 KB
kiB :-)
where the setup overhead is amortized.
Signed-off-by: Santhosh Kumar K <s-k6@xxxxxx>
---
drivers/spi/spi-cadence-quadspi.c | 181 ++++++++++++++++++++++++++++--
1 file changed, 171 insertions(+), 10 deletions(-)
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index 72208d376305..80e7c572ab80 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -564,6 +564,61 @@ static void cqspi_readdata_capture(struct cqspi_st *cqspi, const bool bypass,
writel(reg, reg_base + CQSPI_REG_READCAPTURE);
}
+static int cqspi_phy_enable(struct cqspi_flash_pdata *f_pdata, bool
enable)
I'm fine with the logic, just the naming is very "TI" specific here. Can
we name the helper "cqspi_tune_phy(f_pdata, enable)"?
[...]
static int cqspi_exec_flash_cmd(struct cqspi_st *cqspi, unsigned int reg)
{
void __iomem *reg_base = cqspi->iobase;
@@ -1191,6 +1246,7 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
void __iomem *reg_base = cqspi->iobase;
unsigned int remaining = n_tx;
unsigned int write_bytes;
+ bool use_phy_write;
int ret;
if (!refcount_read(&cqspi->refcount))
@@ -1226,6 +1282,15 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
if (cqspi->apb_ahb_hazard)
readl(reg_base + CQSPI_REG_INDIRECTWR);
+ /* Use PHY only for large writes where setup overhead is amortized */
+ use_phy_write = n_tx >= SZ_1K && f_pdata->use_phy;
Maybe also "f_pdata->use_tuned_phy?
Yeah, I'll rename them in v4.
+ if (use_phy_write) {
+ ret = cqspi_phy_enable(f_pdata, true);
+ if (ret)
+ goto failwr;
+ }
+
while (remaining > 0) {
size_t write_words, mod_bytes;
@@ -1266,6 +1331,9 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
goto failwr;
}
+ if (use_phy_write)
+ cqspi_phy_enable(f_pdata, false);
+
/* Disable interrupt. */
writel(0, reg_base + CQSPI_REG_IRQMASK);
@@ -1277,6 +1345,9 @@ static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
return 0;
failwr:
+ if (use_phy_write)
+ cqspi_phy_enable(f_pdata, false);
+
/* Disable interrupt. */
writel(0, reg_base + CQSPI_REG_IRQMASK);
@@ -1448,8 +1519,15 @@ static void cqspi_rx_dma_callback(void *param)
complete(&cqspi->rx_dma_complete);
}
-static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata,
- u_char *buf, loff_t from, size_t len)
+static bool cqspi_use_phy(struct cqspi_flash_pdata *f_pdata,
+ const struct spi_mem_op *op)
+{
+ return f_pdata->use_phy && op->data.nbytes > 16 &&
Why is the check looking for 16 here, and 1kiB above?
Direct reads have very little per-op overhead, so enabling PHY is
beneficial even for relatively small transfers. (> 16)
Indirect writes, on the other hand, incur significantly higher setup
cost, resulting in much larger break point. (> 1kiB)
+ op->max_freq == f_pdata->max_clk_rate;
+}
+
+static int cqspi_direct_read_dma(struct cqspi_flash_pdata *f_pdata, u_char *buf,
+ loff_t from, size_t len)
{
struct cqspi_st *cqspi = f_pdata->cqspi;
struct device *dev = &cqspi->pdev->dev;
@@ -1461,19 +1539,14 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata,
dma_addr_t dma_dst;
struct device *ddev;
- if (!cqspi->rx_chan || !virt_addr_valid(buf)) {
- memcpy_fromio(buf, cqspi->ahb_base + from, len);
- return 0;
- }
This (and changes below) don't seem to be directly related to the PHY
addition, could we have those changes done in a separated patch, before
introducing PHY tuning use?
-
ddev = cqspi->rx_chan->device->dev;
dma_dst = dma_map_single(ddev, buf, len, DMA_FROM_DEVICE);
if (dma_mapping_error(ddev, dma_dst)) {
dev_err(dev, "dma mapping failed\n");
return -ENOMEM;
}
- tx = dmaengine_prep_dma_memcpy(cqspi->rx_chan, dma_dst, dma_src,
- len, flags);
+ tx = dmaengine_prep_dma_memcpy(cqspi->rx_chan, dma_dst, dma_src, len,
+ flags);
Not related to the change, isn't it?
Yeah, not related I'll leave this untouched. However, the changes above
and below are related and belong together in the same patch.
if (!tx) {
dev_err(dev, "device_prep_dma_memcpy error\n");
ret = -EIO;
@@ -1507,6 +1580,94 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata,
return ret;
}
[...]
static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata,
const struct spi_mem_op *op)
{
@@ -1524,7 +1685,7 @@ static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata,
if ((cqspi->use_direct_mode && ((from + len) <= cqspi->ahb_size)) ||
(cqspi->ddata && cqspi->ddata->quirks & CQSPI_NO_INDIRECT_MODE))
- return cqspi_direct_read_execute(f_pdata, buf, from, len);
+ return cqspi_direct_read_execute(f_pdata, op);
This change could also be done in a different commit.
Thanks,
Santhosh.
if (cqspi->use_dma_read && ddata && ddata->indirect_read_dma &&
virt_addr_valid(buf) && ((dma_align & CQSPI_DMA_UNALIGN) == 0))
Thanks,
Miquèl