Re: [PATCH v6 1/1] dmaengine: switchtec-dma: Introduce Switchtec DMA engine PCI driver

From: Kelvin.Cao
Date: Wed Aug 02 2023 - 23:15:42 EST


On Wed, 2023-08-02 at 00:12 +0530, Vinod Koul wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you
> know the content is safe
>
> On 28-07-23, 13:03, Kelvin Cao wrote:
>
> > +static struct dma_async_tx_descriptor *
> > +switchtec_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dst,
> > +                       dma_addr_t dma_src, size_t len, unsigned
> > long flags)
> > +     __acquires(swdma_chan->submit_lock)
> > +{
> > +     if (len > SWITCHTEC_DESC_MAX_SIZE) {
> > +             /*
> > +              * Keep sparse happy by restoring an even lock count
> > on
> > +              * this lock.
> > +              */
> > +             __acquire(swdma_chan->submit_lock);
> > +             return NULL;
> > +     }
> > +
> > +     return switchtec_dma_prep_desc(c, MEMCPY,
> > SWITCHTEC_INVALID_HFID,
> > +                                    dma_dst,
> > SWITCHTEC_INVALID_HFID, dma_src,
> > +                                    0, len, flags);
> > +}
> > +
> > +static struct dma_async_tx_descriptor *
> > +switchtec_dma_prep_wimm_data(struct dma_chan *c, dma_addr_t
> > dma_dst, u64 data,
> > +                          unsigned long flags)
>
> can you please explain what this wimm data refers to...
>
> I think adding imm callback was a mistake, we need a better
> justification for another user for this, who programs this, what gets
> programmed here

Sure. I think it's an alternative method to prep_mem and would be more
convenient to use when the write is 8-byte and the data to be moved is
not in a DMA mapped memory location. For example, we write to a
doorbell register with the value from a local variable which is not
associated with a DMA address to notify the receiver to consume the
data, after confirming that the previously initiated DMA transactions
of the data have completed. I agree that the use scenario would be very
limited.
>
> > +     __acquires(swdma_chan->submit_lock)
> > +{
> > +     struct switchtec_dma_chan *swdma_chan =
> > to_switchtec_dma_chan(c);
> > +     struct device *chan_dev = to_chan_dev(swdma_chan);
> > +     size_t len = sizeof(data);
> > +
> > +     if (len == 8 && (dma_dst & ((1 << DMAENGINE_ALIGN_8_BYTES) -
> > 1))) {
> > +             dev_err(chan_dev,
> > +                     "QW WIMM dst addr 0x%08x_%08x not QW
> > aligned!\n",
> > +                     upper_32_bits(dma_dst),
> > lower_32_bits(dma_dst));
> > +             /*
> > +              * Keep sparse happy by restoring an even lock count
> > on
> > +              * this lock.
> > +              */
> > +             __acquire(swdma_chan->submit_lock);
> > +             return NULL;
> > +     }
> > +
> > +     return switchtec_dma_prep_desc(c, WIMM,
> > SWITCHTEC_INVALID_HFID, dma_dst,
> > +                                    SWITCHTEC_INVALID_HFID, 0,
> > data, len,
> > +                                    flags);
> > +}
> > +
>
> ...
>
> > +static int switchtec_dma_alloc_desc(struct switchtec_dma_chan
> > *swdma_chan)
> > +{
> > +     struct switchtec_dma_dev *swdma_dev = swdma_chan->swdma_dev;
> > +     struct chan_fw_regs __iomem *chan_fw = swdma_chan-
> > >mmio_chan_fw;
> > +     struct pci_dev *pdev;
> > +     struct switchtec_dma_desc *desc;
> > +     size_t size;
> > +     int rc;
> > +     int i;
> > +
> > +     swdma_chan->head = 0;
> > +     swdma_chan->tail = 0;
> > +     swdma_chan->cq_tail = 0;
> > +
> > +     size = SWITCHTEC_DMA_SQ_SIZE * sizeof(*swdma_chan->hw_sq);
> > +     swdma_chan->hw_sq = dma_alloc_coherent(swdma_dev-
> > >dma_dev.dev, size,
> > +                                            &swdma_chan-
> > >dma_addr_sq,
> > +                                            GFP_NOWAIT);
> > +     if (!swdma_chan->hw_sq) {
> > +             rc = -ENOMEM;
> > +             goto free_and_exit;
> > +     }
> > +
> > +     size = SWITCHTEC_DMA_CQ_SIZE * sizeof(*swdma_chan->hw_cq);
> > +     swdma_chan->hw_cq = dma_alloc_coherent(swdma_dev-
> > >dma_dev.dev, size,
> > +                                            &swdma_chan-
> > >dma_addr_cq,
> > +                                            GFP_NOWAIT);
> > +     if (!swdma_chan->hw_cq) {
> > +             rc = -ENOMEM;
> > +             goto free_and_exit;
> > +     }
> > +
> > +     /* reset host phase tag */
> > +     swdma_chan->phase_tag = 0;
> > +
> > +     size = sizeof(*swdma_chan->desc_ring);
> > +     swdma_chan->desc_ring = kcalloc(SWITCHTEC_DMA_RING_SIZE,
> > size,
> > +                                     GFP_NOWAIT);
> > +     if (!swdma_chan->desc_ring) {
> > +             rc = -ENOMEM;
> > +             goto free_and_exit;
> > +     }
> > +
> > +     for (i = 0; i < SWITCHTEC_DMA_RING_SIZE; i++) {
> > +             desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
> > +             if (!desc) {
> > +                     rc = -ENOMEM;
> > +                     goto free_and_exit;
> > +             }
> > +
> > +             dma_async_tx_descriptor_init(&desc->txd, &swdma_chan-
> > >dma_chan);
> > +             desc->txd.tx_submit = switchtec_dma_tx_submit;
> > +             desc->hw = &swdma_chan->hw_sq[i];
> > +             desc->completed = true;
> > +
> > +             swdma_chan->desc_ring[i] = desc;
> > +     }
> > +
> > +     rcu_read_lock();
> > +     pdev = rcu_dereference(swdma_dev->pdev);
> > +     if (!pdev) {
> > +             rcu_read_unlock();
> > +             rc = -ENODEV;
> > +             goto free_and_exit;
> > +     }
> > +
> > +     /* set sq/cq */
> > +     writel(lower_32_bits(swdma_chan->dma_addr_sq), &chan_fw-
> > >sq_base_lo);
> > +     writel(upper_32_bits(swdma_chan->dma_addr_sq), &chan_fw-
> > >sq_base_hi);
> > +     writel(lower_32_bits(swdma_chan->dma_addr_cq), &chan_fw-
> > >cq_base_lo);
> > +     writel(upper_32_bits(swdma_chan->dma_addr_cq), &chan_fw-
> > >cq_base_hi);
> > +
> > +     writew(SWITCHTEC_DMA_SQ_SIZE, &swdma_chan->mmio_chan_fw-
> > >sq_size);
> > +     writew(SWITCHTEC_DMA_CQ_SIZE, &swdma_chan->mmio_chan_fw-
> > >cq_size);
>
> what is write happening in the descriptor alloc callback, that does
> not
> sound correct to me

All the queue descriptors of a channel are pre-allocated, so I think
it's proper to convey the queue address/size to hardware at this point.
After this initialization, we only need to assign cookie in submit and
update queue head to hardware in issue_pending.

Kelvin