[PATCH v2] spi: cadence-xspi: Support 32bit and 64bit slave dma interface

From: Jisheng Zhang

Date: Sat May 30 2026 - 08:56:35 EST


The cdns xspi controller slave dma interface may support wider data
width. Wider I/O width can benefit performance. We can know the width
by checking the CTRL_FEATURES_REG's DMA_DATA_WIDTH bit, 0 means 32bit
1 means 64bit.

A simple test with QSPI nor flash on one arm64 platform:

Use 8bit slave dma data width (now):
# dd if=/dev/mtdblock0 of=/dev/null bs=8192 count=1000
1000+0 records in
1000+0 records out
8192000 bytes (7.8MB) copied, 1.368735 seconds, 5.7MB/s

Use 32bit slave dma data width:
# dd if=/dev/mtdblock0 of=/dev/null bs=8192 count=1000
1000+0 records in
1000+0 records out
8192000 bytes (7.8MB) copied, 1.088787 seconds, 7.2MB/s

Improved by 26.3%!

Use 64bit slave dma data width:
# dd if=/dev/mtdblock0 of=/dev/null bs=8192 count=1000
1000+0 records in
1000+0 records out
8192000 bytes (7.8MB) copied, 0.831104 seconds, 9.4MB/s

Improved by 64.9%!

Signed-off-by: Jisheng Zhang <jszhang@xxxxxxxxxx>
---

Since v1:
- the hw capability(slave dma data width) can be found by checking the
CTRL_FEATURES_REG's DMA_DATA_WIDTH, so no need dt property any more.

drivers/spi/spi-cadence-xspi.c | 53 +++++++++++++++++++++++++++++++---
1 file changed, 49 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c
index 895b4b3276a5..ab6f1c68a2eb 100644
--- a/drivers/spi/spi-cadence-xspi.c
+++ b/drivers/spi/spi-cadence-xspi.c
@@ -369,6 +369,8 @@ struct cdns_xspi_dev {

void *in_buffer;
const void *out_buffer;
+ /* Slave DMA data width in bytes (4 or 8). */
+ u8 dma_data_width;

u8 hw_num_banks;

@@ -573,11 +575,56 @@ static int cdns_xspi_controller_init(struct cdns_xspi_dev *cdns_xspi)

ctrl_features = readl(cdns_xspi->iobase + CDNS_XSPI_CTRL_FEATURES_REG);
cdns_xspi->hw_num_banks = FIELD_GET(CDNS_XSPI_NUM_BANKS, ctrl_features);
+ cdns_xspi->dma_data_width = (ctrl_features & CDNS_XSPI_DMA_DATA_WIDTH) ? 8 : 4;
cdns_xspi->set_interrupts_handler(cdns_xspi, false);

return 0;
}

+static inline void cdns_xspi_sdma_read(struct cdns_xspi_dev *cdns_xspi, size_t len)
+{
+ void __iomem *src = cdns_xspi->sdmabase;
+ void *buf = cdns_xspi->in_buffer;
+ size_t offset = 0;
+
+ if (cdns_xspi->dma_data_width == 4) {
+ if (IS_ALIGNED((uintptr_t)src, 4) && IS_ALIGNED((uintptr_t)buf, 4)) {
+ ioread32_rep(src, buf, len >> 2);
+ offset = len & ~0x3;
+ len -= offset;
+ }
+ } else {
+ if (IS_ALIGNED((uintptr_t)src, 8) && IS_ALIGNED((uintptr_t)buf, 8)) {
+ ioread64_rep(src, buf, len >> 3);
+ offset = len & ~0x7;
+ len -= offset;
+ }
+ }
+ ioread8_rep(src, (u8 *)buf + offset, len);
+}
+
+static inline void cdns_xspi_sdma_write(struct cdns_xspi_dev *cdns_xspi, size_t len)
+{
+ void __iomem *dst = cdns_xspi->sdmabase;
+ const void *buf = cdns_xspi->out_buffer;
+ size_t offset = 0;
+
+ if (cdns_xspi->dma_data_width == 4) {
+ if (IS_ALIGNED((uintptr_t)dst, 4) && IS_ALIGNED((uintptr_t)buf, 4)) {
+ iowrite32_rep(dst, buf, len >> 2);
+ offset = len & ~0x3;
+ len -= offset;
+ }
+ } else {
+ if (IS_ALIGNED((uintptr_t)dst, 8) && IS_ALIGNED((uintptr_t)buf, 8)) {
+ iowrite64_rep(dst, buf, len >> 3);
+ offset = len & ~0x7;
+ len -= offset;
+ }
+ }
+ iowrite8_rep(dst, (const u8 *)buf + offset, len);
+}
+
static void cdns_xspi_sdma_handle(struct cdns_xspi_dev *cdns_xspi)
{
u32 sdma_size, sdma_trd_info;
@@ -589,13 +636,11 @@ static void cdns_xspi_sdma_handle(struct cdns_xspi_dev *cdns_xspi)

switch (sdma_dir) {
case CDNS_XSPI_SDMA_DIR_READ:
- ioread8_rep(cdns_xspi->sdmabase,
- cdns_xspi->in_buffer, sdma_size);
+ cdns_xspi_sdma_read(cdns_xspi, sdma_size);
break;

case CDNS_XSPI_SDMA_DIR_WRITE:
- iowrite8_rep(cdns_xspi->sdmabase,
- cdns_xspi->out_buffer, sdma_size);
+ cdns_xspi_sdma_write(cdns_xspi, sdma_size);
break;
}
}
--
2.53.0