Re: xhci_hcd 0000:00:14.0: ERROR Transfer event TRB DMA ptr not part of current TD ep_index 1 comp_code 1

From: Michał Pecio
Date: Sun Apr 07 2024 - 08:26:00 EST


This (and the absence of any earlier errors on the endpoint) looks
like the hardware may be confirming a "successful" transfer twice or
the driver may be processing one such confirmation twice.

[ 94.088594] usb 1-2: USB disconnect, device number 8
[ 94.089370] xhci_hcd 0000:00:14.0: ERROR Transfer event TRB DMA ptr not part of current TD ep_index 1 comp_code 1
[ 94.089403] xhci_hcd 0000:00:14.0: Looking for event-dma 00000001250310f0 trb-start 0000000125031100 trb-end 0000000125031100 seg-start 0000000125031000 seg-end 0000000125031ff0
[ 94.089427] xhci_hcd 0000:00:14.0: last xhci_td_cleanup: first_dma 1250310f0 last_dma 1250310f0 status -115 from finish_td

(I say "successful" but it really isn't - the device is no longer
listening. But there is no delivery confirmation on isochronous OUT
endpoints so the xHC doesn't suspect anything.)

Could you try again with this updated debug patch to get more info?

Regards,
Michal
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 52278afea94b..21886ef303db 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -843,11 +843,16 @@ static void xhci_unmap_td_bounce_buffer(struct xhci_hcd *xhci,
seg->bounce_offs = 0;
}

-static int xhci_td_cleanup(struct xhci_hcd *xhci, struct xhci_td *td,
- struct xhci_ring *ep_ring, int status)
+static int __xhci_td_cleanup(struct xhci_hcd *xhci, struct xhci_td *td,
+ struct xhci_ring *ep_ring, int status, const char *from)
{
struct urb *urb = NULL;

+ ep_ring->cleanup_first_dma = xhci_trb_virt_to_dma(ep_ring->deq_seg, td->first_trb);
+ ep_ring->cleanup_last_dma = xhci_trb_virt_to_dma(ep_ring->deq_seg, td->last_trb);
+ ep_ring->cleanup_status = status;
+ ep_ring->cleanup_from = from;
+
/* Clean up the endpoint's TD list */
urb = td->urb;

@@ -891,6 +896,8 @@ static int xhci_td_cleanup(struct xhci_hcd *xhci, struct xhci_td *td,
return 0;
}

+#define xhci_td_cleanup(xhci, td, ep_ring, status) __xhci_td_cleanup(xhci, td, ep_ring, status, __func__)
+

/* Complete the cancelled URBs we unlinked from td_list. */
static void xhci_giveback_invalidated_tds(struct xhci_virt_ep *ep)
@@ -2179,6 +2186,15 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
u32 trb_comp_code)
{
struct xhci_ep_ctx *ep_ctx;
+ struct urb_priv *urb_priv;
+ struct usb_iso_packet_descriptor *frame;
+
+ urb_priv = td->urb->hcpriv;
+ frame = &td->urb->iso_frame_desc[urb_priv->num_tds_done];
+
+ if (ep_ring->interesting)
+ xhci_info(xhci, "finish_td td_status %d comp_code %d frame_status %d frame_actual_length %d\n",
+ td->status, trb_comp_code, frame->status, frame->actual_length);

ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep->ep_index);

@@ -2391,12 +2407,17 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
short_framestatus = td->urb->transfer_flags & URB_SHORT_NOT_OK ?
-EREMOTEIO : 0;

+ if (ep_ring->interesting)
+ xhci_info(xhci, "process_isoc_td event %px requested %u ep_trb_len %u remaining %u\n", event, requested, ep_trb_len, remaining);
+
/* handle completion code */
switch (trb_comp_code) {
case COMP_SUCCESS:
/* Don't overwrite status if TD had an error, see xHCI 4.9.1 */
- if (td->error_mid_td)
+ if (td->error_mid_td) {
+ xhci_info(xhci, "Got SUCCESS after mid TD error\n");
break;
+ }
if (remaining) {
frame->status = short_framestatus;
if (xhci->quirks & XHCI_TRUST_TX_LENGTH)
@@ -2462,7 +2483,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
finish_td:
/* Don't give back TD yet if we encountered an error mid TD */
if (td->error_mid_td && ep_trb != td->last_trb) {
- xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n");
+ xhci_info(xhci, "Error mid isoc TD, wait for final completion event\n");
td->urb_length_set = true;
return 0;
}
@@ -2597,6 +2618,11 @@ static int handle_tx_event(struct xhci_hcd *xhci,
trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
ep_trb_dma = le64_to_cpu(event->buffer);

+ bool interesting = trb_comp_code != COMP_SUCCESS && trb_comp_code != COMP_SHORT_PACKET;
+ if (interesting)
+ xhci_info(xhci, "handle_tx_event event %px ep_trb_dma %llx comp_code %d len %u slot %u ep %d\n",
+ event, ep_trb_dma, trb_comp_code, EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)), slot_id, ep_index);
+
ep = xhci_get_virt_ep(xhci, slot_id, ep_index);
if (!ep) {
xhci_err(xhci, "ERROR Invalid Transfer event\n");
@@ -2606,6 +2632,11 @@ static int handle_tx_event(struct xhci_hcd *xhci,
ep_ring = xhci_dma_to_transfer_ring(ep, ep_trb_dma);
ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep_index);

+ snprintf(&ep_ring->log_b[ep_ring->log_i++][0], 100,
+ "event %px ep_trb_dma %llx comp_code %d len %u slot %u ep %d",
+ event, ep_trb_dma, trb_comp_code, EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)), slot_id, ep_index);
+ ep_ring->log_i %= 5;
+
if (GET_EP_CTX_STATE(ep_ctx) == EP_STATE_DISABLED) {
xhci_err(xhci,
"ERROR Transfer event for disabled endpoint slot %u ep %u\n",
@@ -2640,6 +2671,11 @@ static int handle_tx_event(struct xhci_hcd *xhci,
}
}

+ if (!interesting && ep_ring->interesting)
+ xhci_info(xhci, "handle_tx_event event %px ep_trb_dma %llx comp_code %d len %u slot %u ep %d\n",
+ event, ep_trb_dma, trb_comp_code, EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)), slot_id, ep_index);
+ ep_ring->interesting |= interesting;
+
/* Count current td numbers if ep->skip is set */
if (ep->skip)
td_num += list_count_nodes(&ep_ring->td_list);
@@ -2831,6 +2867,12 @@ static int handle_tx_event(struct xhci_hcd *xhci,
ep_seg = trb_in_td(xhci, td->start_seg, td->first_trb,
td->last_trb, ep_trb_dma, false);

+ if (ep_ring->interesting)
+ xhci_info(xhci, "handle_tx_event event %px first_trb %llx last_trb %llx ep_seg %llx\n", event,
+ xhci_trb_virt_to_dma(ep_ring->deq_seg, td->first_trb),
+ xhci_trb_virt_to_dma(ep_ring->deq_seg, td->last_trb),
+ ep_seg? ep_seg->dma: -1);
+
/*
* Skip the Force Stopped Event. The event_trb(event_dma) of FSE
* is not in the current TD pointed by ep_ring->dequeue because
@@ -2880,7 +2922,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
td_next->last_trb, ep_trb_dma, false);
if (ep_seg) {
/* give back previous TD, start handling new */
- xhci_dbg(xhci, "Missing TD completion event after mid TD error\n");
+ xhci_info(xhci, "Missing TD completion event after mid TD error\n");
ep_ring->dequeue = td->last_trb;
ep_ring->deq_seg = td->last_trb_seg;
inc_deq(xhci, ep_ring);
@@ -2898,9 +2940,16 @@ static int handle_tx_event(struct xhci_hcd *xhci,
trb_comp_code);
trb_in_td(xhci, td->start_seg, td->first_trb,
td->last_trb, ep_trb_dma, true);
+ xhci_info(xhci, "last xhci_td_cleanup: first_dma %llx last_dma %llx status %d from %s\n",
+ ep_ring->cleanup_first_dma, ep_ring->cleanup_last_dma,
+ ep_ring->cleanup_status, ep_ring->cleanup_from);
+ for (int i = 0; i < 5; i++)
+ xhci_info(xhci, "handle_tx_event log %2d: %s\n", i-4, &ep_ring->log_b[(ep_ring->log_i + i) % 5][0]);
+ ep_ring->interesting = true;
return -ESHUTDOWN;
}
}
+
if (trb_comp_code == COMP_SHORT_PACKET)
ep_ring->last_td_was_short = true;
else
@@ -2958,6 +3007,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
*/
} while (handling_skipped_tds);

+ ep_ring->interesting = interesting;
+
return 0;

err_out:
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 6f4bf98a6282..7e2c83e3b9e9 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1360,7 +1360,16 @@ struct xhci_ring {
unsigned int bounce_buf_len;
enum xhci_ring_type type;
bool last_td_was_short;
+ bool interesting;
struct radix_tree_root *trb_address_map;
+
+ dma_addr_t cleanup_first_dma;
+ dma_addr_t cleanup_last_dma;
+ const char *cleanup_from;
+ int cleanup_status;
+
+ int log_i;
+ char log_b[5][100];
};

struct xhci_erst_entry {