Re: [PATCH RFC v2 6/8] spi: axi-spi-engine: add offload support

From: Nuno Sá
Date: Tue May 21 2024 - 08:27:49 EST


On Fri, 2024-05-10 at 19:44 -0500, David Lechner wrote:
> This implements SPI offload support for the AXI SPI Engine. Currently,
> the hardware only supports triggering offload transfers with a hardware
> trigger so attempting to use an offload message in the regular SPI
> message queue will fail. Also, only allows streaming rx data to an
> external sink, so attempts to use a rx_buf in the offload message will
> fail.
>
> Signed-off-by: David Lechner <dlechner@xxxxxxxxxxxx>
> ---
>
> v2 changes:
>
> This patch has been reworked to accommodate the changes described in all
> of the other patches.
> ---
>  drivers/spi/spi-axi-spi-engine.c | 267
> ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 264 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-
> engine.c
> index e358ac5b4509..95327df572a0 100644
> --- a/drivers/spi/spi-axi-spi-engine.c
> +++ b/drivers/spi/spi-axi-spi-engine.c
> @@ -2,6 +2,7 @@
>  /*
>   * SPI-Engine SPI controller driver
>   * Copyright 2015 Analog Devices Inc.
> + * Copyright 2024 BayLibre, SAS
>   *  Author: Lars-Peter Clausen <lars@xxxxxxxxxx>
>   */
>  
> @@ -16,6 +17,7 @@
>  #include <linux/platform_device.h>
>  #include <linux/spi/spi.h>
>  
> +#define SPI_ENGINE_REG_OFFLOAD_MEM_ADDR_WIDTH 0x10
>  #define SPI_ENGINE_REG_RESET 0x40
>  
>  #define SPI_ENGINE_REG_INT_ENABLE 0x80
> @@ -23,6 +25,7 @@
>  #define SPI_ENGINE_REG_INT_SOURCE 0x88
>  
>  #define SPI_ENGINE_REG_SYNC_ID 0xc0
> +#define SPI_ENGINE_REG_OFFLOAD_SYNC_ID 0xc4
>  
>  #define SPI_ENGINE_REG_CMD_FIFO_ROOM 0xd0
>  #define SPI_ENGINE_REG_SDO_FIFO_ROOM 0xd4
> @@ -33,10 +36,24 @@
>  #define SPI_ENGINE_REG_SDI_DATA_FIFO 0xe8
>  #define SPI_ENGINE_REG_SDI_DATA_FIFO_PEEK 0xec
>  
> +#define SPI_ENGINE_MAX_NUM_OFFLOADS 32
> +
> +#define SPI_ENGINE_REG_OFFLOAD_CTRL(x) (0x100 +
> SPI_ENGINE_MAX_NUM_OFFLOADS * (x))
> +#define SPI_ENGINE_REG_OFFLOAD_STATUS(x) (0x104 +
> SPI_ENGINE_MAX_NUM_OFFLOADS * (x))
> +#define SPI_ENGINE_REG_OFFLOAD_RESET(x) (0x108 +
> SPI_ENGINE_MAX_NUM_OFFLOADS * (x))
> +#define SPI_ENGINE_REG_OFFLOAD_CMD_FIFO(x) (0x110 +
> SPI_ENGINE_MAX_NUM_OFFLOADS * (x))
> +#define SPI_ENGINE_REG_OFFLOAD_SDO_FIFO(x) (0x114 +
> SPI_ENGINE_MAX_NUM_OFFLOADS * (x))
> +
> +#define SPI_ENGINE_SPI_OFFLOAD_MEM_WIDTH_SDO GENMASK(15, 8)
> +#define SPI_ENGINE_SPI_OFFLOAD_MEM_WIDTH_CMD GENMASK(7, 0)
> +
>  #define SPI_ENGINE_INT_CMD_ALMOST_EMPTY BIT(0)
>  #define SPI_ENGINE_INT_SDO_ALMOST_EMPTY BIT(1)
>  #define SPI_ENGINE_INT_SDI_ALMOST_FULL BIT(2)
>  #define SPI_ENGINE_INT_SYNC BIT(3)
> +#define SPI_ENGINE_INT_OFFLOAD_SYNC BIT(4)
> +
> +#define SPI_ENGINE_OFFLOAD_CTRL_ENABLE BIT(0)
>  
>  #define SPI_ENGINE_CONFIG_CPHA BIT(0)
>  #define SPI_ENGINE_CONFIG_CPOL BIT(1)
> @@ -74,6 +91,10 @@
>  #define SPI_ENGINE_CMD_SYNC(id) \
>   SPI_ENGINE_CMD(SPI_ENGINE_INST_MISC, SPI_ENGINE_MISC_SYNC, (id))
>  
> +/* default sizes - can be changed when SPI Engine firmware is compiled */
> +#define SPI_ENGINE_OFFLOAD_CMD_FIFO_SIZE 16
> +#define SPI_ENGINE_OFFLOAD_SDO_FIFO_SIZE 16
> +
>  struct spi_engine_program {
>   unsigned int length;
>   uint16_t instructions[] __counted_by(length);
> @@ -101,6 +122,12 @@ struct spi_engine_message_state {
>   uint8_t *rx_buf;
>  };
>  
> +struct spi_engine_offload {
> + struct spi_device *spi;
> + unsigned int id;
> + bool prepared;
> +};
> +
>  struct spi_engine {
>   struct clk *clk;
>   struct clk *ref_clk;
> @@ -111,6 +138,10 @@ struct spi_engine {
>   struct spi_engine_message_state msg_state;
>   struct completion msg_complete;
>   unsigned int int_enable;
> +
> + unsigned int offload_ctrl_mem_size;
> + unsigned int offload_sdo_mem_size;
> + struct spi_engine_offload offload_priv[SPI_ENGINE_MAX_NUM_OFFLOADS];
>  };
>  
>  static void spi_engine_program_add_cmd(struct spi_engine_program *p,
> @@ -154,7 +185,7 @@ static void spi_engine_gen_xfer(struct spi_engine_program
> *p, bool dry,
>  
>   if (xfer->tx_buf)
>   flags |= SPI_ENGINE_TRANSFER_WRITE;
> - if (xfer->rx_buf)
> + if (xfer->rx_buf || (xfer->offload_flags &
> SPI_OFFLOAD_XFER_RX_STREAM))
>   flags |= SPI_ENGINE_TRANSFER_READ;
>  
>   spi_engine_program_add_cmd(p, dry,
> @@ -202,16 +233,24 @@ static void spi_engine_gen_cs(struct spi_engine_program
> *p, bool dry,
>   *
>   * NB: This is separate from spi_engine_compile_message() because the latter
>   * is called twice and would otherwise result in double-evaluation.
> + *
> + * Returns 0 on success, -EINVAL on failure.
>   */
> -static void spi_engine_precompile_message(struct spi_message *msg)
> +static int spi_engine_precompile_message(struct spi_message *msg)
>  {
>   unsigned int clk_div, max_hz = msg->spi->controller->max_speed_hz;
>   struct spi_transfer *xfer;
>  
>   list_for_each_entry(xfer, &msg->transfers, transfer_list) {
> + /* If we have an offload transfer, we can't rx to buffer */
> + if (msg->offload && xfer->rx_buf)
> + return -EINVAL;
> +
>   clk_div = DIV_ROUND_UP(max_hz, xfer->speed_hz);
>   xfer->effective_speed_hz = max_hz / min(clk_div, 256U);
>   }
> +
> + return 0;
>  }
>  
>  static void spi_engine_compile_message(struct spi_message *msg, bool dry,
> @@ -503,8 +542,11 @@ static irqreturn_t spi_engine_irq(int irq, void *devid)
>  static int spi_engine_optimize_message(struct spi_message *msg)
>  {
>   struct spi_engine_program p_dry, *p;
> + int ret;
>  
> - spi_engine_precompile_message(msg);
> + ret = spi_engine_precompile_message(msg);
> + if (ret)
> + return ret;
>  
>   p_dry.length = 0;
>   spi_engine_compile_message(msg, true, &p_dry);
> @@ -539,6 +581,11 @@ static int spi_engine_transfer_one_message(struct
> spi_controller *host,
>   unsigned int int_enable = 0;
>   unsigned long flags;
>  
> + if (msg->offload) {
> + dev_err(&host->dev, "Single transfer offload not
> supported\n");
> + return -EOPNOTSUPP;
> + }
> +
>   /* reinitialize message state for this transfer */
>   memset(st, 0, sizeof(*st));
>   st->cmd_buf = p->instructions;
> @@ -579,6 +626,204 @@ static int spi_engine_transfer_one_message(struct
> spi_controller *host,
>   return msg->status;
>  }
>  
> +static struct spi_engine_offload *spi_engine_get_offload(struct spi_device
> *spi,
> + unsigned int id,
> + unsigned int
> *offload_num)
> +{
> + struct spi_controller *host = spi->controller;
> + struct spi_engine *spi_engine = spi_controller_get_devdata(host);
> + struct spi_engine_offload *priv;
> + int i;
> +
> + for (i = 0; i < SPI_ENGINE_MAX_NUM_OFFLOADS; i++) {
> + priv = &spi_engine->offload_priv[i];
> +
> + if (priv->spi == spi && priv->id == id) {
> + *offload_num = i;
> + return priv;
> + }
> + }
> +
> + return ERR_PTR(-ENODEV);
> +}
> +
> +static int spi_engine_offload_map_channel(struct spi_device *spi,
> +   unsigned int id,
> +   unsigned int channel)
> +{
> + struct spi_controller *host = spi->controller;
> + struct spi_engine *spi_engine = spi_controller_get_devdata(host);
> + struct spi_engine_offload *priv;
> +
> + if (channel >= SPI_ENGINE_MAX_NUM_OFFLOADS)
> + return -EINVAL;
> +
> + priv = &spi_engine->offload_priv[channel];
> +
> + if (priv->spi)
> + return -EBUSY;

I wonder if we need to be this strict? Is there any problem by having two
devices requesting the same offload engine? I would expect that having multiple
peripherals trying to actually use it at the same time (with the prepare()
callback) to be problematic but if they play along it could actually work,
right? In reality that may never be a realistic usecase so this is likely fine.

> +
> + priv->spi = spi;
> + priv->id = id;
> +
> + return 0;
> +}
> +
> +static int spi_engine_offload_prepare(struct spi_device *spi, unsigned int
> id,
> +       struct spi_message *msg)
> +{
> + struct spi_controller *host = spi->controller;
> + struct spi_engine *spi_engine = spi_controller_get_devdata(host);
> + struct spi_engine_program *p = msg->opt_state;
> + struct spi_engine_offload *priv;
> + struct spi_transfer *xfer;
> + void __iomem *cmd_addr;
> + void __iomem *sdo_addr;
> + size_t tx_word_count = 0;
> + unsigned int offload_num, i;
> +
> + priv = spi_engine_get_offload(spi, id, &offload_num);
> + if (IS_ERR(priv))
> + return PTR_ERR(priv);
> +
> + if (priv->prepared)
> + return -EBUSY;
> +
> + if (p->length > spi_engine->offload_ctrl_mem_size)
> + return -EINVAL;
> +
> + /* count total number of tx words in message */
> + list_for_each_entry(xfer, &msg->transfers, transfer_list) {
> + if (!xfer->tx_buf)
> + continue;
> +
> + if (xfer->bits_per_word <= 8)
> + tx_word_count += xfer->len;
> + else if (xfer->bits_per_word <= 16)
> + tx_word_count += xfer->len / 2;
> + else
> + tx_word_count += xfer->len / 4;
> + }
> +
> + if (tx_word_count > spi_engine->offload_sdo_mem_size)
> + return -EINVAL;
> +
> + cmd_addr = spi_engine->base + SPI_ENGINE_REG_OFFLOAD_CMD_FIFO(priv-
> >id);
> + sdo_addr = spi_engine->base +
> SPI_ENGINE_REG_OFFLOAD_SDO_FIFO(offload_num);
> +
> + list_for_each_entry(xfer, &msg->transfers, transfer_list) {
> + if (!xfer->tx_buf)
> + continue;
> +
> + if (xfer->bits_per_word <= 8) {
> + const u8 *buf = xfer->tx_buf;
> +
> + for (i = 0; i < xfer->len; i++)
> + writel_relaxed(buf[i], sdo_addr);
> + } else if (xfer->bits_per_word <= 16) {
> + const u16 *buf = xfer->tx_buf;
> +
> + for (i = 0; i < xfer->len / 2; i++)
> + writel_relaxed(buf[i], sdo_addr);
> + } else {
> + const u32 *buf = xfer->tx_buf;
> +
> + for (i = 0; i < xfer->len / 4; i++)
> + writel_relaxed(buf[i], sdo_addr);
> + }
> + }
> +
> + for (i = 0; i < p->length; i++)
> + writel_relaxed(p->instructions[i], cmd_addr);
> +
> + msg->offload_state = (void *)(intptr_t)offload_num;
> + priv->prepared = true;
> +
> + return 0;
> +}
> +
> +static void spi_engine_offload_unprepare(struct spi_device *spi, unsigned int
> id)
> +{
> + struct spi_controller *host = spi->controller;
> + struct spi_engine *spi_engine = spi_controller_get_devdata(host);
> + struct spi_engine_offload *priv;
> + unsigned int offload_num;
> +
> + priv = spi_engine_get_offload(spi, id, &offload_num);
> + if (IS_ERR(priv)) {
> + dev_warn(&spi->dev, "failed match offload in unprepare\n");
> + return;
> + }
> +
> + writel_relaxed(1, spi_engine->base +
> SPI_ENGINE_REG_OFFLOAD_RESET(offload_num));
> + writel_relaxed(0, spi_engine->base +
> SPI_ENGINE_REG_OFFLOAD_RESET(offload_num));
> +
> + priv->prepared = false;
> +}
> +
> +static int spi_engine_offload_enable(struct spi_device *spi, unsigned int id)
> +{
> + struct spi_controller *host = spi->controller;
> + struct spi_engine *spi_engine = spi_controller_get_devdata(host);
> + struct spi_engine_offload *priv;
> + unsigned int offload_num, reg;
> +
> + priv = spi_engine_get_offload(spi, id, &offload_num);
> + if (IS_ERR(priv))
> + return PTR_ERR(priv);
> +
> + reg = readl_relaxed(spi_engine->base +
> +     SPI_ENGINE_REG_OFFLOAD_CTRL(offload_num));
> + reg |= SPI_ENGINE_OFFLOAD_CTRL_ENABLE;
> + writel_relaxed(reg, spi_engine->base +
> +     SPI_ENGINE_REG_OFFLOAD_CTRL(offload_num));
> +
> + return 0;
> +}
> +
> +static void spi_engine_offload_disable(struct spi_device *spi, unsigned int
> id)
> +{
> + struct spi_controller *host = spi->controller;
> + struct spi_engine *spi_engine = spi_controller_get_devdata(host);
> + struct spi_engine_offload *priv;
> + unsigned int offload_num, reg;
> +
> + priv = spi_engine_get_offload(spi, id, &offload_num);
> + if (IS_ERR(priv)) {
> + dev_warn(&spi->dev, "failed match offload in disable\n");
> + return;
> + }
> +
> + reg = readl_relaxed(spi_engine->base +
> +     SPI_ENGINE_REG_OFFLOAD_CTRL(offload_num));
> + reg &= ~SPI_ENGINE_OFFLOAD_CTRL_ENABLE;
> + writel_relaxed(reg, spi_engine->base +
> +     SPI_ENGINE_REG_OFFLOAD_CTRL(offload_num));
> +}
> +
> +static const struct spi_controller_offload_ops spi_engine_offload_ops = {
> + .map_channel = spi_engine_offload_map_channel,
> + .prepare = spi_engine_offload_prepare,
> + .unprepare = spi_engine_offload_unprepare,
> + .hw_trigger_enable = spi_engine_offload_enable,
> + .hw_trigger_disable = spi_engine_offload_disable,

I guess this is what you and Conor are already somehow discussing but I would
expect this to be the actual offload trigger to play a spi transfer. As it
stands, it looks weird (or confusing) to have the enable/disable of the engine
to act as a trigger... Maybe these callbacks could be used to enable/disable the
actual trigger of the offload engine (in our current cases, the PWM)? So this
would make it easy to move the trigger DT property where it belongs. The DMA one
(given it's tight relation with IIO DMA buffers) is another (way more difficult)
story I think.

- Nuno Sá