[RFT PATCH 1/4] usb: dwc2: Simplify and fix DMA alignment code

From: Guenter Roeck
Date: Wed Feb 26 2020 - 16:04:28 EST


The code to align buffers for DMA was first introduced with commit
3bc04e28a030 ("usb: dwc2: host: Get aligned DMA in a more supported way").
It was updated with commit 56406e017a88 ("usb: dwc2: Fix DMA alignment
to start at allocated boundary") because it did not really align buffers to
DMA boundaries but to word offsets. This was then optimized in commit
1e111e885238 ("usb: dwc2: Fix inefficient copy of unaligned buffers")
to only copy actual data rather than the whole buffer. Commit 4a4863bf2e79
("usb: dwc2: Fix DMA cache alignment issues") changed this further to add
a padding at the end of the buffer to ensure that the old data pointer is
not in the same cache line as the buffer.

This last commit states "Otherwise, the stored_xfer_buffer gets corrupted
for IN URBs on non-cache-coherent systems". However, such corruptions are
still observed. This suggests that the commit may have been hiding a
problem rather than fixing it. Further analysis shows that this is indeed
the case: The code in dwc2_hc_start_transfer() assumes that the transfer
size is a multiple of wMaxPacketSize, and rounds up the transfer size
communicated to the chip accordingly. Added debug code confirms that
the chip does under some circumstances indeed send more data than requested
in the urb receive buffer size.

On top of that, it turns out that buffers are still not guaranteed to be
aligned to dma_get_cache_alignment(), but to DWC2_USB_DMA_ALIGN (4).
Further debugging shows that packets aligned to DWC2_USB_DMA_ALIGN
but not to dma_get_cache_alignment() are indeed common and work just fine.
This suggests that commit 56406e017a88 was not really necessary because
even without it packets were already aligned to DWC2_USB_DMA_ALIGN.

To simplify the code, move the old data pointer back to the beginning of
the new buffer, restoring most of the original commit. Stop aligning the
buffer to dma_get_cache_alignment() since it isn't needed and only makes
the code more complex. Instead, ensure that the allocated buffer is a
multiple of wMaxPacketSize to ensure that the chip does not write beyond
the end of the buffer.

Cc: Douglas Anderson <dianders@xxxxxxxxxxxx>
Cc: Boris Arzur <boris@xxxxxxxxx>
Fixes: 56406e017a88 ("usb: dwc2: Fix DMA alignment to start at allocated boundary")
Signed-off-by: Guenter Roeck <linux@xxxxxxxxxxxx>
---
drivers/usb/dwc2/hcd.c | 67 ++++++++++++++++++++++--------------------
1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index b90f858af960..f6d8cc9cee34 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -2471,19 +2471,21 @@ static int dwc2_alloc_split_dma_aligned_buf(struct dwc2_hsotg *hsotg,

#define DWC2_USB_DMA_ALIGN 4

+struct dma_aligned_buffer {
+ void *old_xfer_buffer;
+ u8 data[0];
+};
+
static void dwc2_free_dma_aligned_buffer(struct urb *urb)
{
- void *stored_xfer_buffer;
+ struct dma_aligned_buffer *dma;
size_t length;

if (!(urb->transfer_flags & URB_ALIGNED_TEMP_BUFFER))
return;

- /* Restore urb->transfer_buffer from the end of the allocated area */
- memcpy(&stored_xfer_buffer,
- PTR_ALIGN(urb->transfer_buffer + urb->transfer_buffer_length,
- dma_get_cache_alignment()),
- sizeof(urb->transfer_buffer));
+ dma = container_of(urb->transfer_buffer,
+ struct dma_aligned_buffer, data);

if (usb_urb_dir_in(urb)) {
if (usb_pipeisoc(urb->pipe))
@@ -2491,49 +2493,50 @@ static void dwc2_free_dma_aligned_buffer(struct urb *urb)
else
length = urb->actual_length;

- memcpy(stored_xfer_buffer, urb->transfer_buffer, length);
+ memcpy(dma->old_xfer_buffer, dma->data, length);
}
- kfree(urb->transfer_buffer);
- urb->transfer_buffer = stored_xfer_buffer;
+ urb->transfer_buffer = dma->old_xfer_buffer;
+ kfree(dma);

urb->transfer_flags &= ~URB_ALIGNED_TEMP_BUFFER;
}

static int dwc2_alloc_dma_aligned_buffer(struct urb *urb, gfp_t mem_flags)
{
- void *kmalloc_ptr;
+ struct dma_aligned_buffer *dma;
size_t kmalloc_size;

- if (urb->num_sgs || urb->sg ||
- urb->transfer_buffer_length == 0 ||
+ if (urb->num_sgs || urb->sg || urb->transfer_buffer_length == 0 ||
+ (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP) ||
!((uintptr_t)urb->transfer_buffer & (DWC2_USB_DMA_ALIGN - 1)))
return 0;

- /*
- * Allocate a buffer with enough padding for original transfer_buffer
- * pointer. This allocation is guaranteed to be aligned properly for
- * DMA
- */
- kmalloc_size = urb->transfer_buffer_length +
- (dma_get_cache_alignment() - 1) +
- sizeof(urb->transfer_buffer);
+ kmalloc_size = sizeof(struct dma_aligned_buffer);
+ if (usb_urb_dir_out(urb)) {
+ kmalloc_size += urb->transfer_buffer_length;
+ } else {
+ struct usb_host_endpoint *ep = urb->ep;
+ int maxp = usb_endpoint_maxp(&ep->desc);

- kmalloc_ptr = kmalloc(kmalloc_size, mem_flags);
- if (!kmalloc_ptr)
- return -ENOMEM;
+ /*
+ * Input transfer buffer size must be a multiple of the
+ * endpoint's maximum packet size to match the transfer
+ * limit programmed into the chip.
+ * See calculation of chan->xfer_len in
+ * dwc2_hc_start_transfer().
+ */
+ kmalloc_size += roundup(urb->transfer_buffer_length, maxp);
+ }

- /*
- * Position value of original urb->transfer_buffer pointer to the end
- * of allocation for later referencing
- */
- memcpy(PTR_ALIGN(kmalloc_ptr + urb->transfer_buffer_length,
- dma_get_cache_alignment()),
- &urb->transfer_buffer, sizeof(urb->transfer_buffer));
+ dma = kmalloc(kmalloc_size, mem_flags);
+ if (!dma)
+ return -ENOMEM;

+ dma->old_xfer_buffer = urb->transfer_buffer;
if (usb_urb_dir_out(urb))
- memcpy(kmalloc_ptr, urb->transfer_buffer,
+ memcpy(dma->data, urb->transfer_buffer,
urb->transfer_buffer_length);
- urb->transfer_buffer = kmalloc_ptr;
+ urb->transfer_buffer = dma->data;

urb->transfer_flags |= URB_ALIGNED_TEMP_BUFFER;

--
2.17.1