Hi,
On Fri, Apr 14, 2023 at 7:06 AM Vijaya Krishna Nivarthi
<quic_vnivarth@xxxxxxxxxxx> wrote:
@@ -14,7 +16,6 @@Drop unrelated whitespace change.
#include <linux/spi/spi.h>
#include <linux/spi/spi-mem.h>
-
@@ -108,6 +110,10 @@Looking at the above with a correctly configured editor (tab size=8)
#define RD_FIFO_RESET 0x0030
#define RESET_FIFO BIT(0)
+#define NEXT_DMA_DESC_ADDR 0x0040
+#define CURRENT_DMA_DESC_ADDR 0x0044
+#define CURRENT_MEM_ADDR 0x0048
the numbers don't line up. The first and 3rd have one too many tabs.
@@ -120,6 +126,27 @@ enum qspi_dir {Nothing uses QSPI_MAX_NUM_DESC. Drop it.
QSPI_WRITE,
};
+struct qspi_cmd_desc {
+ u32 data_address;
+ u32 next_descriptor;
+ u32 direction:1;
+ u32 multi_io_mode:3;
+ u32 reserved1:4;
+ u32 fragment:1;
+ u32 reserved2:7;
+ u32 length:16;
+ /*
+ * This marks end of HW cmd descriptor
+ * Fields down below are for SW usage to
+ * copy data from DMA buffer to rx buffer
+ */
+ u8 *bounce_src;
+ u8 *bounce_dst;
+ u32 bounce_length;
+};
+
+#define QSPI_MAX_NUM_DESC 5
@@ -137,11 +164,36 @@ enum qspi_clocks {Why bother with INVALID? It's either FIFO or DMA, right?
QSPI_NUM_CLKS
};
+enum qspi_xfer_mode {
+ QSPI_INVALID,
+ QSPI_FIFO,
+ QSPI_DMA
+};
+/* number of entries in sgt returned from spi framework that we can support */Is the above a hardware limitation, or just because you are statically
+#define QSPI_MAX_SG 5
allocating arrays? Please clarify in the comment. Would it make sense
to just dynamically allocate the arrays and remove the need for this
limitation?
+/* 3 descriptors for head, aligned part and tail */Instead of "int", shouldn't xfer_mode be "enum qspi_xfer_mode"?
+#define QSPI_NUM_CMD_DESC 3
+
+/* 2 descriptors for head, tail */
+#define QSPI_NUM_DAT_DESC 2
+
struct qcom_qspi {
void __iomem *base;
struct device *dev;
struct clk_bulk_data *clks;
struct qspi_xfer xfer;
+ struct dma_pool *dma_cmd_pool;
+ struct dma_pool *dma_data_pool;
+ dma_addr_t dma_cmd_desc[QSPI_NUM_CMD_DESC*QSPI_MAX_SG];
+ dma_addr_t dma_data_desc[QSPI_NUM_DAT_DESC*QSPI_MAX_SG];
+ void *virt_cmd_desc[QSPI_NUM_CMD_DESC*QSPI_MAX_SG];
+ void *virt_data_desc[QSPI_NUM_DAT_DESC*QSPI_MAX_SG];
+ unsigned int n_cmd_desc;
+ unsigned int n_data_desc;
+ int xfer_mode;
Although below I'm proposing that xfer_mode can just be completely
dropped from this structure.
@@ -151,18 +203,25 @@ struct qcom_qspi {Wouldn't it be easier to do the test once at the end? In other words,
static u32 qspi_buswidth_to_iomode(struct qcom_qspi *ctrl,
unsigned int buswidth)
{
+ u32 ret;
+
+ /* for DMA we don't write to PIO_XFER_CFG register, so don't shift */
switch (buswidth) {
case 1:
- return SDR_1BIT << MULTI_IO_MODE_SHFT;
+ ret = (ctrl->xfer_mode == QSPI_DMA ? SDR_1BIT : SDR_1BIT << MULTI_IO_MODE_SHFT);
+ break;
case 2:
- return SDR_2BIT << MULTI_IO_MODE_SHFT;
+ ret = (ctrl->xfer_mode == QSPI_DMA ? SDR_2BIT : SDR_2BIT << MULTI_IO_MODE_SHFT);
+ break;
case 4:
- return SDR_4BIT << MULTI_IO_MODE_SHFT;
+ ret = (ctrl->xfer_mode == QSPI_DMA ? SDR_4BIT : SDR_4BIT << MULTI_IO_MODE_SHFT);
+ break;
default:
dev_warn_once(ctrl->dev,
"Unexpected bus width: %u\n", buswidth);
- return SDR_1BIT << MULTI_IO_MODE_SHFT;
+ ret = (ctrl->xfer_mode == QSPI_DMA ? SDR_1BIT : SDR_1BIT << MULTI_IO_MODE_SHFT);
}
+ return ret;
in the switch statement "ret" never contains the shift and then at the
end:
if (ctrl->xfer_mode != QSPI_DMA)
ret <<= MULTI_IO_MODE_SHFT;
return ret;
...or, even better, just always return the unshifted mode and do the
shifting unconditionally in qcom_qspi_pio_xfer_cfg(). Then you never
need to look at xfer_mode to decide.
@@ -241,12 +316,16 @@ static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz)The change to this function is completely a no-op, right? You check
return ret;
}
+ avg_bw_cpu = Bps_to_icc(speed_hz);
/*
- * Set BW quota for CPU as driver supports FIFO mode only.
- * We don't have explicit peak requirement so keep it equal to avg_bw.
+ * Set BW quota for CPU for FIFO to avg_bw
+ * as we don't have explicit peak requirement.
+ * TBD TBD TBD - change this as required for DMA.
+ * As of now same peak requirement seems to be working.
*/
- avg_bw_cpu = Bps_to_icc(speed_hz);
- ret = icc_set_bw(ctrl->icc_path_cpu_to_qspi, avg_bw_cpu, avg_bw_cpu);
+ peak_bw_cpu = ctrl->xfer_mode == QSPI_FIFO ? avg_bw_cpu : avg_bw_cpu;
+
+ ret = icc_set_bw(ctrl->icc_path_cpu_to_qspi, avg_bw_cpu, peak_bw_cpu);
the mode against QSPI_FIFO but you set the "peak_bw_cpu" to the same
thing in both modes. ...and the thing you set it to is exactly the
same as the function set it to before your patch.
...so drop all the changes you made to this function.
@@ -258,6 +337,190 @@ static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz)Why is "n_bytes" "uint32_t" instead of just "u32"? Please just use
return 0;
}
+/* aligned to 32 bytes, not to cross 1KB boundary */
+#define QSPI_ALIGN_REQ 32
+#define QSPI_BOUNDARY_REQ 1024
+
+static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, uint8_t *virt_ptr,
+ dma_addr_t dma_ptr, uint32_t n_bytes)
"u32" consistently in this file.
+ }Do we really need the above? Maybe we can drop it and (in a separate
+ }
+ return 0;
+
+cleanup:
+ dev_err(ctrl->dev, "ERROR cleanup in setup_dma_desc\n");
+ for (i = 0; i < ctrl->n_data_desc; i++)
+ dma_pool_free(ctrl->dma_data_pool, ctrl->virt_data_desc[i],
+ ctrl->dma_data_desc[i]);
+ ctrl->n_data_desc = 0;
+
+ for (i = 0; i < ctrl->n_cmd_desc; i++)
+ dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
+ ctrl->dma_cmd_desc[i]);
+ ctrl->n_cmd_desc = 0;
+ return ret;
+}
+
+static void qcom_qspi_dma_xfer(struct qcom_qspi *ctrl)
+{
+ /* Ack any previous interrupts that might be hanging around */
+ writel(DMA_CHAIN_DONE, ctrl->base + MSTR_INT_STATUS);
patch) drop the similar statement in qcom_qspi_pio_xfer()?
If this is truly needed for some reason, then it seems like in both
cases you should ack _all_ interrupts (the FIFO plus the DMA ones)
since we might be switching back and forth between the two modes and
thus any old interrupts that are "hanging around" could be from
either. ...but I think you can just drop it. If there are really any
interrupts "hanging around" we're in pretty bad shape.
+ /* Setup new interrupts */Why do you need this explicit wmb()? I'm fairly sure that this is
+ writel(DMA_CHAIN_DONE, ctrl->base + MSTR_INT_EN);
+
+ /* flush all writes */
+ wmb();
handled automatically by the fact that you're using writel() and not
writel_relaxed(). writel() documents that it is "ordered relative to
any prior Normal memory access" and certainly it's ordered relative to
IO writes to the same device.
+It feels like there's one too many casts here. Shouldn't this just be
+ /* kick off transfer */
+ writel((uint32_t)(uintptr_t)(ctrl->dma_cmd_desc)[0], ctrl->base + NEXT_DMA_DESC_ADDR);
"(u32)(ctrl->dma_cmd_desc[0])"?
+}No need for the "? true : false". Just:
+
+/* Switch to DMA if transfer length exceeds this */
+#define QSPI_MAX_BYTES_FIFO 64
+
+static bool qcom_qspi_can_dma(struct spi_controller *ctlr,
+ struct spi_device *slv, struct spi_transfer *xfer)
+{
+ return xfer->len > QSPI_MAX_BYTES_FIFO ? true : false;
return xfer->len > QSPI_MAX_BYTES_FIFO;
@@ -290,7 +555,25 @@ static int qcom_qspi_transfer_one(struct spi_master *master,Maybe it would be better to check if either "xfer->rx_sg.nents" or
ctrl->xfer.is_last = list_is_last(&xfer->transfer_list,
&master->cur_msg->transfers);
ctrl->xfer.rem_bytes = xfer->len;
- qcom_qspi_pio_xfer(ctrl);
+
+ if (qcom_qspi_can_dma(master, slv, xfer)) {
"xfer->tx_sg.nents" is non-zero. That indicates that the SPI framework
is expecting you to do DMA.
+ ctrl->xfer_mode = QSPI_DMA;Don't store iomode in the "ctrl" structure (remove it from that
+ ctrl->iomode = qspi_buswidth_to_iomode(ctrl, ctrl->xfer.buswidth);
struct). Just make it a local variable in qcom_qspi_setup_dma_desc()
and then pass it in to the one place that needs it:
qcom_qspi_alloc_desc()
+ mstr_cfg |= DMA_ENABLE;nit: I seem to remember IO writes to the controller taking a
+ writel(mstr_cfg, ctrl->base + MSTR_CONFIG);
non-trivial amount of time. Maybe worth it to do?
if (!(mstr_cfg & DMA_ENABLE)) {
mstr_cfg |= DMA_ENABLE;
writel(mstr_cfg, ctrl->base + MSTR_CONFIG);
}
...similar for the "else" case below.
@@ -328,6 +611,40 @@ static int qcom_qspi_prepare_message(struct spi_master *master,Instead of dma_pool_create(), use dmam_pool_create(). That looks to be
return 0;
}
+static int qcom_qspi_alloc_dma(struct qcom_qspi *ctrl)
+{
+ /* allocate for cmd descriptors pool */
+ ctrl->dma_cmd_pool = dma_pool_create("qspi cmd desc pool",
+ ctrl->dev, sizeof(struct qspi_cmd_desc), 0, 0);
the (oddly named) devm version of the function. Then you can fully get
rid of qcom_qspi_free_dma() and also the dma_pool_destroy() in your
error handling below.
It also seems really weird that the "data" pool has such strict
alignment requirements and you do a whole ton of work to meet those
requirements, but the "cmd" pool has no alignment requirements at all.
Is this really correct?
+ if (!ctrl->dma_cmd_pool) {nit: no need for an error message here. You can assume that allocation
+ dev_err(ctrl->dev, "Could not create dma pool for cmd_desc\n");
failures will already print a warning splat and you don't need another
one for this incredibly unlikely event.
@@ -426,27 +743,63 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)I don't think you really need to check xfer_mode here, do you? If
int_status = readl(ctrl->base + MSTR_INT_STATUS);
writel(int_status, ctrl->base + MSTR_INT_STATUS);
- if (ctrl->xfer.dir == QSPI_WRITE) {
- if (int_status & WR_FIFO_EMPTY)
- ret = pio_write(ctrl);
- } else {
- if (int_status & RESP_FIFO_RDY)
- ret = pio_read(ctrl);
- }
-
- if (int_status & QSPI_ERR_IRQS) {
- if (int_status & RESP_FIFO_UNDERRUN)
- dev_err(ctrl->dev, "IRQ error: FIFO underrun\n");
- if (int_status & WR_FIFO_OVERRUN)
- dev_err(ctrl->dev, "IRQ error: FIFO overrun\n");
- if (int_status & HRESP_FROM_NOC_ERR)
- dev_err(ctrl->dev, "IRQ error: NOC response error\n");
- ret = IRQ_HANDLED;
- }
-
- if (!ctrl->xfer.rem_bytes) {
- writel(0, ctrl->base + MSTR_INT_EN);
- spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
+ switch (ctrl->xfer_mode) {
+ case QSPI_FIFO:
xfer_mode is FIFO then only the FIFO-related interrupts are enabled.
If xfer_mode is DMA then only the DMA-related interrupts are enabled.
In fact, I think you can fully get rid of the "xfer_mode" structure
member completely. It's completely redundant.
@@ -487,6 +840,9 @@ static int qcom_qspi_probe(struct platform_device *pdev)Get rid of this initialization. Above I'm suggesting getting rid of
if (ret)
return ret;
+ /* set default mode to FIFO */
+ ctrl->xfer_mode = QSPI_FIFO;
"xfter" mode altogether, but in any case, we should be setting this
before each transfer so the initialization doesn't do anything useful.
@@ -556,10 +923,15 @@ static int qcom_qspi_probe(struct platform_device *pdev)To make this the reverse order of probe the qcom_qspi_free_dma() call
static void qcom_qspi_remove(struct platform_device *pdev)
{
struct spi_master *master = platform_get_drvdata(pdev);
+ struct qcom_qspi *ctrl;
+
+ ctrl = spi_master_get_devdata(master);
/* Unregister _before_ disabling pm_runtime() so we stop transfers */
spi_unregister_master(master);
+ qcom_qspi_free_dma(ctrl);
+
pm_runtime_disable(&pdev->dev);
should be _after_ the pm_runtime_disable(), although above I'm
suggesting fully getting rid of qcom_qspi_free_dma() so maybe the
point is moot.