[PATCH 5/7] drm/msm/gpu: Also snapshot GMU HFI buffer

From: Rob Clark
Date: Wed Nov 24 2021 - 16:40:06 EST


From: Rob Clark <robdclark@xxxxxxxxxxxx>

This also includes a history of start index of the last 8 messages on
each queue, since parsing backwards to decode recently sent HFI messages
is hard(ish).

Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx>
---
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 48 ++++++++++++++++++++-
drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 10 +++++
drivers/gpu/drm/msm/adreno/a6xx_hfi.h | 11 +++++
3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 1de103f29d25..a84ba8982cb8 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -43,6 +43,9 @@ struct a6xx_gpu_state {
int nr_cx_debugbus;

struct msm_gpu_state_bo *gmu_log;
+ struct msm_gpu_state_bo *gmu_hfi;
+
+ s32 hfi_queue_history[2][HFI_HISTORY_SZ];

struct list_head objs;
};
@@ -822,6 +825,25 @@ static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
return snapshot;
}

+static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
+ struct a6xx_gpu_state *a6xx_state)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+ struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+ unsigned i, j;
+
+ BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
+
+ for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
+ struct a6xx_hfi_queue *queue = &gmu->queues[i];
+ for (j = 0; j < HFI_HISTORY_SZ; j++) {
+ unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
+ a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
+ }
+ }
+}
+
#define A6XX_GBIF_REGLIST_SIZE 1
static void a6xx_get_registers(struct msm_gpu *gpu,
struct a6xx_gpu_state *a6xx_state,
@@ -960,6 +982,9 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
a6xx_get_gmu_registers(gpu, a6xx_state);

a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
+ a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
+
+ a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);

/* If GX isn't on the rest of the data isn't going to be accessible */
if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
@@ -1005,6 +1030,9 @@ static void a6xx_gpu_state_destroy(struct kref *kref)
if (a6xx_state->gmu_log)
kvfree(a6xx_state->gmu_log->data);

+ if (a6xx_state->gmu_hfi)
+ kvfree(a6xx_state->gmu_hfi->data);
+
list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
kfree(obj);

@@ -1223,11 +1251,29 @@ void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;

drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova);
- drm_printf(p, " size: %d\n", gmu_log->size);
+ drm_printf(p, " size: %zu\n", gmu_log->size);
adreno_show_object(p, &gmu_log->data, gmu_log->size,
&gmu_log->encoded);
}

+ drm_puts(p, "gmu-hfi:\n");
+ if (a6xx_state->gmu_hfi) {
+ struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
+ unsigned i, j;
+
+ drm_printf(p, " iova: 0x%016llx\n", gmu_hfi->iova);
+ drm_printf(p, " size: %zu\n", gmu_hfi->size);
+ for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
+ drm_printf(p, " queue-history[%u]:", i);
+ for (j = 0; j < HFI_HISTORY_SZ; j++) {
+ drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
+ }
+ drm_printf(p, "\n");
+ }
+ adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
+ &gmu_hfi->encoded);
+ }
+
drm_puts(p, "registers:\n");
for (i = 0; i < a6xx_state->nr_registers; i++) {
struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
index d4c65bf0a1b7..d73fce5fdf1f 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
@@ -36,6 +36,8 @@ static int a6xx_hfi_queue_read(struct a6xx_gmu *gmu,

hdr = queue->data[index];

+ queue->history[(queue->history_idx++) % HFI_HISTORY_SZ] = index;
+
/*
* If we are to assume that the GMU firmware is in fact a rational actor
* and is programmed to not send us a larger response than we expect
@@ -75,6 +77,8 @@ static int a6xx_hfi_queue_write(struct a6xx_gmu *gmu,
return -ENOSPC;
}

+ queue->history[(queue->history_idx++) % HFI_HISTORY_SZ] = index;
+
for (i = 0; i < dwords; i++) {
queue->data[index] = data[i];
index = (index + 1) % header->size;
@@ -600,6 +604,9 @@ void a6xx_hfi_stop(struct a6xx_gmu *gmu)

queue->header->read_index = 0;
queue->header->write_index = 0;
+
+ memset(&queue->history, 0xff, sizeof(queue->history));
+ queue->history_idx = 0;
}
}

@@ -612,6 +619,9 @@ static void a6xx_hfi_queue_init(struct a6xx_hfi_queue *queue,
queue->data = virt;
atomic_set(&queue->seqnum, 0);

+ memset(&queue->history, 0xff, sizeof(queue->history));
+ queue->history_idx = 0;
+
/* Set up the shared memory header */
header->iova = iova;
header->type = 10 << 8 | id;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h
index 2bd670ca42d6..528110169398 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.h
@@ -33,6 +33,17 @@ struct a6xx_hfi_queue {
spinlock_t lock;
u32 *data;
atomic_t seqnum;
+
+ /*
+ * Tracking for the start index of the last N messages in the
+ * queue, for the benefit of devcore dump / crashdec (since
+ * parsing in the reverse direction to decode the last N
+ * messages is difficult to do and would rely on heuristics
+ * which are not guaranteed to be correct)
+ */
+#define HFI_HISTORY_SZ 8
+ s32 history[HFI_HISTORY_SZ];
+ u8 history_idx;
};

/* This is the outgoing queue to the GMU */
--
2.33.1