Re: [PATCH] usb: cdns3: Coherent memory allocation optimization

From: Peter Chen
Date: Fri Mar 05 2021 - 20:39:05 EST


On 21-03-05 17:01:11, Sanket Parmar wrote:
> Allocation of DMA coherent memory in atomic context using
> dma_alloc_coherent() might fail on some platform. To fix it,
> Replaced dma_alloc_coherent() with dma_pool API to allocate a
> smaller chunk of DMA coherent memory for TRB rings.
>
> Also in cdns3_prepare_aligned_request_buf(), replaced
> dma_alloc_coherent() with kmalloc and dma_map API to allocate
> aligned request buffer of dynamic length.
>

You do two changes in one commit, would you please split this one as
two patches?

> Fixes: commit 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")

"commit" is not needed

> Reported by: Aswath Govindraju <a-govindraju@xxxxxx>

Reported-by:

> Signed-off-by: Sanket Parmar <sparmar@xxxxxxxxxxx>
> ---
> drivers/usb/cdns3/cdns3-gadget.c | 115 ++++++++++++++++++++++---------------
> drivers/usb/cdns3/cdns3-gadget.h | 3 +
> 2 files changed, 71 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
> index 582bfec..5fd6993 100644
> --- a/drivers/usb/cdns3/cdns3-gadget.c
> +++ b/drivers/usb/cdns3/cdns3-gadget.c
> @@ -59,6 +59,7 @@
> #include <linux/dma-mapping.h>
> #include <linux/usb/gadget.h>
> #include <linux/module.h>
> +#include <linux/dmapool.h>
> #include <linux/iopoll.h>
>
> #include "core.h"
> @@ -190,29 +191,13 @@ dma_addr_t cdns3_trb_virt_to_dma(struct cdns3_endpoint *priv_ep,
> return priv_ep->trb_pool_dma + offset;
> }
>
> -static int cdns3_ring_size(struct cdns3_endpoint *priv_ep)
> -{
> - switch (priv_ep->type) {
> - case USB_ENDPOINT_XFER_ISOC:
> - return TRB_ISO_RING_SIZE;
> - case USB_ENDPOINT_XFER_CONTROL:
> - return TRB_CTRL_RING_SIZE;
> - default:
> - if (priv_ep->use_streams)
> - return TRB_STREAM_RING_SIZE;
> - else
> - return TRB_RING_SIZE;
> - }
> -}
> -
> static void cdns3_free_trb_pool(struct cdns3_endpoint *priv_ep)
> {
> struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
>
> if (priv_ep->trb_pool) {
> - dma_free_coherent(priv_dev->sysdev,
> - cdns3_ring_size(priv_ep),
> - priv_ep->trb_pool, priv_ep->trb_pool_dma);
> + dma_pool_free(priv_dev->eps_dma_pool,
> + priv_ep->trb_pool, priv_ep->trb_pool_dma);
> priv_ep->trb_pool = NULL;
> }
> }
> @@ -226,7 +211,7 @@ static void cdns3_free_trb_pool(struct cdns3_endpoint *priv_ep)
> int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep)
> {
> struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
> - int ring_size = cdns3_ring_size(priv_ep);
> + int ring_size = TRB_RING_SIZE;

You will use the same size for TRB ring region for control/bulk/isoc
endpoint.

> int num_trbs = ring_size / TRB_SIZE;
> struct cdns3_trb *link_trb;
>
> @@ -234,10 +219,10 @@ int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep)
> cdns3_free_trb_pool(priv_ep);
>
> if (!priv_ep->trb_pool) {
> - priv_ep->trb_pool = dma_alloc_coherent(priv_dev->sysdev,
> - ring_size,
> - &priv_ep->trb_pool_dma,
> - GFP_DMA32 | GFP_ATOMIC);
> + priv_ep->trb_pool = dma_pool_alloc(priv_dev->eps_dma_pool,
> + GFP_DMA32 | GFP_ATOMIC,
> + &priv_ep->trb_pool_dma);

dma_pool_alloc also allocates the whole thunk of TRB region. See the
kernel-doc for dma_pool_create.

* Given one of these pools, dma_pool_alloc()
* may be used to allocate memory. Such memory will all have "consistent"
> +
> if (!priv_ep->trb_pool)
> return -ENOMEM;
>
> @@ -833,10 +818,26 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep,
> usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
> priv_ep->dir);
>
> - if ((priv_req->flags & REQUEST_UNALIGNED) &&
> - priv_ep->dir == USB_DIR_OUT && !request->status)
> - memcpy(request->buf, priv_req->aligned_buf->buf,
> - request->length);
> + if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req->aligned_buf) {
> + struct cdns3_aligned_buf *buf;
> +
> + buf = priv_req->aligned_buf;
> + dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> + buf->dir);
> + priv_req->flags &= ~REQUEST_UNALIGNED;
> +
> + if (priv_ep->dir == USB_DIR_OUT && !request->status) {
> + memcpy(request->buf, priv_req->aligned_buf->buf,
> + request->length);
> + }
> +
> + trace_cdns3_free_aligned_request(priv_req);
> + priv_req->aligned_buf->in_use = 0;
> + queue_work(system_freezable_wq,
> + &priv_dev->aligned_buf_wq);
> + priv_req->aligned_buf = NULL;
> +
> + }
>
> priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED);
> /* All TRBs have finished, clear the counter */
> @@ -898,8 +899,7 @@ static void cdns3_free_aligned_request_buf(struct work_struct *work)
> * interrupts.
> */
> spin_unlock_irqrestore(&priv_dev->lock, flags);
> - dma_free_coherent(priv_dev->sysdev, buf->size,
> - buf->buf, buf->dma);
> + kfree(buf->buf);
> kfree(buf);
> spin_lock_irqsave(&priv_dev->lock, flags);
> }
> @@ -925,27 +925,16 @@ static int cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
> if (!buf)
> return -ENOMEM;
>
> - buf->size = priv_req->request.length;
> + buf->size = usb_endpoint_dir_out(priv_ep->endpoint.desc) ?
> + usb_ep_align(&(priv_ep->endpoint), priv_req->request.length)
> + : priv_req->request.length;
>
> - buf->buf = dma_alloc_coherent(priv_dev->sysdev,
> - buf->size,
> - &buf->dma,
> - GFP_ATOMIC);
> + buf->buf = kmalloc(buf->size, GFP_ATOMIC);
> if (!buf->buf) {
> kfree(buf);
> return -ENOMEM;
> }
>
> - if (priv_req->aligned_buf) {
> - trace_cdns3_free_aligned_request(priv_req);
> - priv_req->aligned_buf->in_use = 0;
> - queue_work(system_freezable_wq,
> - &priv_dev->aligned_buf_wq);
> - }
> -
> - buf->in_use = 1;
> - priv_req->aligned_buf = buf;
> -
> list_add_tail(&buf->list,
> &priv_dev->aligned_buf_list);
> }
> @@ -955,6 +944,27 @@ static int cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
> priv_req->request.length);
> }
>
> + if (priv_req->aligned_buf) {
> + trace_cdns3_free_aligned_request(priv_req);
> + priv_req->aligned_buf->in_use = 0;
> + queue_work(system_freezable_wq,
> + &priv_dev->aligned_buf_wq);
> + }
> +
> + buf->dir = priv_ep->dir ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
> + buf->in_use = 1;
> + priv_req->aligned_buf = buf;
> +
> + buf->dma = dma_map_single(priv_dev->sysdev, buf->buf, buf->size,
> + buf->dir);
> +
> + if (dma_mapping_error(priv_dev->sysdev, buf->dma)) {
> + dev_err(priv_dev->dev, "Failed to map buffer\n");
> + kfree(buf->buf);
> + kfree(buf);
> + return -EFAULT;
> + }
> +
> priv_req->flags |= REQUEST_UNALIGNED;
> trace_cdns3_prepare_aligned_request(priv_req);
>
> @@ -3103,16 +3113,17 @@ static void cdns3_gadget_exit(struct cdns *cdns)
> struct cdns3_aligned_buf *buf;
>
> buf = cdns3_next_align_buf(&priv_dev->aligned_buf_list);
> - dma_free_coherent(priv_dev->sysdev, buf->size,
> - buf->buf,
> - buf->dma);
> + dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> + buf->dir);
>
> list_del(&buf->list);
> + kfree(buf->buf);
> kfree(buf);
> }
>
> dma_free_coherent(priv_dev->sysdev, 8, priv_dev->setup_buf,
> priv_dev->setup_dma);
> + dma_pool_destroy(priv_dev->eps_dma_pool);
>
> kfree(priv_dev->zlp_buf);
> usb_put_gadget(&priv_dev->gadget);
> @@ -3185,6 +3196,14 @@ static int cdns3_gadget_start(struct cdns *cdns)
> /* initialize endpoint container */
> INIT_LIST_HEAD(&priv_dev->gadget.ep_list);
> INIT_LIST_HEAD(&priv_dev->aligned_buf_list);
> + priv_dev->eps_dma_pool = dma_pool_create("cdns3_eps_dma_pool",
> + priv_dev->sysdev,
> + TRB_RING_SIZE, 8, 0);
> + if (!priv_dev->eps_dma_pool) {
> + dev_err(priv_dev->dev, "Failed to create TRB dma pool\n");
> + ret = -ENOMEM;
> + goto err1;
> + }
>
> ret = cdns3_init_eps(priv_dev);
> if (ret) {
> @@ -3235,6 +3254,8 @@ static int cdns3_gadget_start(struct cdns *cdns)
> err2:
> cdns3_free_all_eps(priv_dev);
> err1:
> + dma_pool_destroy(priv_dev->eps_dma_pool);
> +
> usb_put_gadget(&priv_dev->gadget);
> cdns->gadget_dev = NULL;
> return ret;
> diff --git a/drivers/usb/cdns3/cdns3-gadget.h b/drivers/usb/cdns3/cdns3-gadget.h
> index 21fa461..c5660f2 100644
> --- a/drivers/usb/cdns3/cdns3-gadget.h
> +++ b/drivers/usb/cdns3/cdns3-gadget.h
> @@ -12,6 +12,7 @@
> #ifndef __LINUX_CDNS3_GADGET
> #define __LINUX_CDNS3_GADGET
> #include <linux/usb/gadget.h>
> +#include <linux/dma-direction.h>
>
> /*
> * USBSS-DEV register interface.
> @@ -1205,6 +1206,7 @@ struct cdns3_aligned_buf {
> void *buf;
> dma_addr_t dma;
> u32 size;
> + enum dma_data_direction dir;
> unsigned in_use:1;
> struct list_head list;
> };
> @@ -1298,6 +1300,7 @@ struct cdns3_device {
>
> struct cdns3_usb_regs __iomem *regs;
>
> + struct dma_pool *eps_dma_pool;
> struct usb_ctrlrequest *setup_buf;
> dma_addr_t setup_dma;
> void *zlp_buf;
> --
> 1.7.1
>

I guess this issue may due to the size for DMA region is too small,
try to enlarge the it (eg, CMA size).

--

Thanks,
Peter Chen