From: Nishad Saraf <nishads@xxxxxxx>
Adds driver to enable PCIe board which uses AMD QDMA (the Queue-based
Direct Memory Access) subsystem. For example, Xilinx Alveo V70 AI
Accelerator devices.
https://www.xilinx.com/applications/data-center/v70.html
The QDMA subsystem is used in conjunction with the PCI Express IP block
to provide high performance data transfer between host memory and the
card's DMA subsystem.
+-------+ +-------+ +-----------+
PCIe | | | | | |
Tx/Rx | | | | AXI | |
<=======> | PCIE | <===> | QDMA | <====>| User Logic|
| | | | | |
+-------+ +-------+ +-----------+
The primary mechanism to transfer data using the QDMA is for the QDMA
engine to operate on instructions (descriptors) provided by the host
operating system. Using the descriptors, the QDMA can move data in both
the Host to Card (H2C) direction, or the Card to Host (C2H) direction.
The QDMA provides a per-queue basis option whether DMA traffic goes
to an AXI4 memory map (MM) interface or to an AXI4-Stream interface.
The hardware detail is provided by
https://docs.xilinx.com/r/en-US/pg302-qdma
Implements dmaengine APIs to support MM DMA transfers.
- probe the available DMA channels
- use dma_slave_map for channel lookup
- use virtual channel to manage dmaengine tx descriptors
- implement device_prep_slave_sg callback to handle host scatter gather
list
- implement descriptor metadata operations to set device address for DMA
transfer
Signed-off-by: Nishad Saraf <nishads@xxxxxxx>
Signed-off-by: Lizhi Hou <lizhi.hou@xxxxxxx>
---
+static void qdma_free_qintr_rings(struct qdma_device *qdev)
+{
+ int i;
+
+ for (i = 0; i < qdev->qintr_ring_num; i++) {
+ if (!qdev->qintr_rings[i].base)
+ continue;
+
+ dma_free_coherent(&qdev->pdev->dev, QDMA_INTR_RING_SIZE,
+ qdev->qintr_rings[i].base,
+ qdev->qintr_rings[i].dev_base);
+ }
+}
+
+static int qdma_alloc_qintr_rings(struct qdma_device *qdev)
+{
+ u32 ctxt[QDMA_CTXT_REGMAP_LEN];
+ struct device *dev = &qdev->pdev->dev;
+ struct qdma_intr_ring *ring;
+ struct qdma_ctxt_intr intr_ctxt;
+ u32 vector;
+ int ret, i;
+
+ qdev->qintr_ring_num = qdev->queue_irq_num;
+ qdev->qintr_rings = devm_kcalloc(dev, qdev->qintr_ring_num,
+ sizeof(*qdev->qintr_rings),
+ GFP_KERNEL);
+ if (!qdev->qintr_rings)
+ return -ENOMEM;
+
+ vector = qdev->queue_irq_start;
+ for (i = 0; i < qdev->qintr_ring_num; i++, vector++) {
+ ring = &qdev->qintr_rings[i];
+ ring->qdev = qdev;
+ ring->msix_id = qdev->err_irq_idx + i + 1;
+ ring->ridx = i;
+ ring->color = 1;
+ ring->base = dma_alloc_coherent(dev, QDMA_INTR_RING_SIZE,
+ &ring->dev_base,
+ GFP_KERNEL);
+ if (!ring->base) {
+ qdma_err(qdev, "Failed to alloc intr ring %d", i);
+ ret = -ENOMEM;
+ goto failed;
+ }
+ intr_ctxt.agg_base = QDMA_INTR_RING_BASE(ring->dev_base);
+ intr_ctxt.size = (QDMA_INTR_RING_SIZE - 1) / 4096;
+ intr_ctxt.vec = ring->msix_id;
+ intr_ctxt.valid = true;
+ intr_ctxt.color = true;
+ ret = qdma_prog_context(qdev, QDMA_CTXT_INTR_COAL,
+ QDMA_CTXT_CLEAR, ring->ridx, NULL);
+ if (ret) {
+ qdma_err(qdev, "Failed clear intr ctx, ret %d", ret);
+ goto failed;
+ }
+
+ qdma_prep_intr_context(qdev, &intr_ctxt, ctxt);
+ ret = qdma_prog_context(qdev, QDMA_CTXT_INTR_COAL,
+ QDMA_CTXT_WRITE, ring->ridx, ctxt);
+ if (ret) {
+ qdma_err(qdev, "Failed setup intr ctx, ret %d", ret);
+ goto failed;
+ }
+
+ ret = devm_request_threaded_irq(dev, vector, NULL,
+ qdma_queue_isr, IRQF_ONESHOT,
+ "amd-qdma-queue", ring);
+ if (ret) {
+ qdma_err(qdev, "Failed to request irq %d", vector);
+ goto failed;
+ }
+ }
+
+ return 0;
+
+failed:
+ qdma_free_qintr_rings(qdev);
+ return ret;
+}