Re: [PATCH 4/4] drm/msm/a6xx: Capture gmu log in devcoredump

From: Akhil P Oommen
Date: Tue Nov 23 2021 - 10:38:59 EST


On 11/23/2021 12:36 AM, Rob Clark wrote:
On Mon, Nov 22, 2021 at 10:26 AM Rob Clark <robdclark@xxxxxxxxx> wrote:

On Thu, Nov 18, 2021 at 2:21 AM Akhil P Oommen <akhilpo@xxxxxxxxxxxxxx> wrote:

Capture gmu log in coredump to enhance debugging.

Signed-off-by: Akhil P Oommen <akhilpo@xxxxxxxxxxxxxx>
---

drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 41 +++++++++++++++++++++++++++++
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 2 +-
drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 ++
3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 7501849..9fa3fa6 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -42,6 +42,8 @@ struct a6xx_gpu_state {
struct a6xx_gpu_state_obj *cx_debugbus;
int nr_cx_debugbus;

+ struct msm_gpu_state_bo *gmu_log;
+
struct list_head objs;
};

@@ -800,6 +802,30 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
&a6xx_state->gmu_registers[2], false);
}

+static void a6xx_get_gmu_log(struct msm_gpu *gpu,
+ struct a6xx_gpu_state *a6xx_state)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+ struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+ struct msm_gpu_state_bo *gmu_log;
+
+ gmu_log = state_kcalloc(a6xx_state,
+ 1, sizeof(*a6xx_state->gmu_log));
+ if (!gmu_log)
+ return;
+
+ gmu_log->iova = gmu->log.iova;
+ gmu_log->size = gmu->log.size;
+ gmu_log->data = kvzalloc(gmu_log->size, GFP_KERNEL);
+ if (!gmu_log->data)
+ return;
+
+ memcpy(gmu_log->data, gmu->log.virt, gmu->log.size);
+
+ a6xx_state->gmu_log = gmu_log;
+}
+
#define A6XX_GBIF_REGLIST_SIZE 1
static void a6xx_get_registers(struct msm_gpu *gpu,
struct a6xx_gpu_state *a6xx_state,
@@ -937,6 +963,8 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)

a6xx_get_gmu_registers(gpu, a6xx_state);

+ a6xx_get_gmu_log(gpu, a6xx_state);
+
/* If GX isn't on the rest of the data isn't going to be accessible */
if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
return &a6xx_state->base;
@@ -978,6 +1006,9 @@ static void a6xx_gpu_state_destroy(struct kref *kref)
struct a6xx_gpu_state *a6xx_state = container_of(state,
struct a6xx_gpu_state, base);

+ if (a6xx_state->gmu_log && a6xx_state->gmu_log->data)
+ kvfree(a6xx_state->gmu_log->data);
+
list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
kfree(obj);

@@ -1191,6 +1222,16 @@ void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,

adreno_show(gpu, state, p);

+ drm_puts(p, "gmu-log:\n");
+ if (a6xx_state->gmu_log) {
+ struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
+
+ drm_printf(p, " iova: 0x%016llx\n", gmu_log->iova);
+ drm_printf(p, " size: %d\n", gmu_log->size);

fwiw, that wants to be:

+ drm_printf(p, " size: %zu\n", gmu_log->size);

with that fixed, r-b

Hmm, actually, I seem to be getting an empty log.. is special gmu fw,
or non-fused device needed for this to work?

BR,
-R

No, there is no special fw. I tested this on 7c3 and it worked for me. a618/a630 has an old version of gmu firmware which is pretty different from the newer ones. Let me check.

-Akhil.


BR,
-R

+ adreno_show_object(p, &gmu_log->data, gmu_log->size,
+ &gmu_log->encoded);
+ }
+
drm_puts(p, "registers:\n");
for (i = 0; i < a6xx_state->nr_registers; i++) {
struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 7486652..7d1ff20 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -630,7 +630,7 @@ static char *adreno_gpu_ascii85_encode(u32 *src, size_t len)
}

/* len is expected to be in bytes */
-static void adreno_show_object(struct drm_printer *p, void **ptr, int len,
+void adreno_show_object(struct drm_printer *p, void **ptr, int len,
bool *encoded)
{
if (!*ptr || !len)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 225c277..6762308 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -306,6 +306,8 @@ void adreno_gpu_state_destroy(struct msm_gpu_state *state);

int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state);
int adreno_gpu_state_put(struct msm_gpu_state *state);
+void adreno_show_object(struct drm_printer *p, void **ptr, int len,
+ bool *encoded);

/*
* Common helper function to initialize the default address space for arm-smmu
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation.