[PATCH 3/6] drm/msm: split rd debugfs file

From: Rob Clark
Date: Tue Oct 24 2017 - 09:23:29 EST


Split into two instances, the existing $debugfs/rd which continues to
dump all submits, and $debugfs/hangrd which will be used to dump just
submits that cause gpu hangs (and eventually faults, but that will
require some iommu framework enhancements).

Signed-off-by: Rob Clark <robdclark@xxxxxxxxx>
---
drivers/gpu/drm/msm/msm_drv.h | 5 ++-
drivers/gpu/drm/msm/msm_gpu.c | 2 +-
drivers/gpu/drm/msm/msm_rd.c | 101 +++++++++++++++++++++++++++++++-----------
3 files changed, 79 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 666fce66f9dd..b01bd7fa9c2b 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -108,7 +108,8 @@ struct msm_drm_private {

struct drm_fb_helper *fbdev;

- struct msm_rd_state *rd;
+ struct msm_rd_state *rd; /* debugfs to dump all submits */
+ struct msm_rd_state *hangrd; /* debugfs to dump hanging submits */
struct msm_perf_state *perf;

/* list of GEM objects: */
@@ -298,7 +299,7 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m);
int msm_debugfs_late_init(struct drm_device *dev);
int msm_rd_debugfs_init(struct drm_minor *minor);
void msm_rd_debugfs_cleanup(struct msm_drm_private *priv);
-void msm_rd_dump_submit(struct msm_gem_submit *submit);
+void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit);
int msm_perf_debugfs_init(struct drm_minor *minor);
void msm_perf_debugfs_cleanup(struct msm_drm_private *priv);
#else
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index d26a7282466e..5baeb34f098e 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -556,7 +556,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,

list_add_tail(&submit->node, &ring->submits);

- msm_rd_dump_submit(submit);
+ msm_rd_dump_submit(priv->rd, submit);

update_sw_cntrs(gpu);

diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 981bb17c11a7..d689407c809a 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -19,11 +19,17 @@
*
* tail -f /sys/kernel/debug/dri/<minor>/rd > logfile.rd
*
- * To log the cmdstream in a format that is understood by freedreno/cffdump
+ * to log the cmdstream in a format that is understood by freedreno/cffdump
* utility. By comparing the last successfully completed fence #, to the
* cmdstream for the next fence, you can narrow down which process and submit
* caused the gpu crash/lockup.
*
+ * Additionally:
+ *
+ * tail -f /sys/kernel/debug/dri/<minor>/hangrd > logfile.rd
+ *
+ * will capture just the cmdstream from submits which triggered a GPU hang.
+ *
* This bypasses drm_debugfs_create_files() mainly because we need to use
* our own fops for a bit more control. In particular, we don't want to
* do anything if userspace doesn't have the debugfs file open.
@@ -220,53 +226,89 @@ static const struct file_operations rd_debugfs_fops = {
.release = rd_release,
};

-int msm_rd_debugfs_init(struct drm_minor *minor)
+
+static void rd_cleanup(struct msm_rd_state *rd)
+{
+ if (!rd)
+ return;
+
+ mutex_destroy(&rd->read_lock);
+ kfree(rd);
+}
+
+static struct msm_rd_state *rd_init(struct drm_minor *minor, const char *name)
{
- struct msm_drm_private *priv = minor->dev->dev_private;
struct msm_rd_state *rd;
struct dentry *ent;
-
- /* only create on first minor: */
- if (priv->rd)
- return 0;
+ int ret = 0;

rd = kzalloc(sizeof(*rd), GFP_KERNEL);
if (!rd)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);

rd->dev = minor->dev;
rd->fifo.buf = rd->buf;

mutex_init(&rd->read_lock);
- priv->rd = rd;

init_waitqueue_head(&rd->fifo_event);

- ent = debugfs_create_file("rd", S_IFREG | S_IRUGO,
+ ent = debugfs_create_file(name, S_IFREG | S_IRUGO,
minor->debugfs_root, rd, &rd_debugfs_fops);
if (!ent) {
- DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/rd\n",
- minor->debugfs_root);
+ DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/%s\n",
+ minor->debugfs_root, name);
+ ret = -ENOMEM;
goto fail;
}

+ return rd;
+
+fail:
+ rd_cleanup(rd);
+ return ERR_PTR(ret);
+}
+
+int msm_rd_debugfs_init(struct drm_minor *minor)
+{
+ struct msm_drm_private *priv = minor->dev->dev_private;
+ struct msm_rd_state *rd;
+ int ret;
+
+ /* only create on first minor: */
+ if (priv->rd)
+ return 0;
+
+ rd = rd_init(minor, "rd");
+ if (IS_ERR(rd)) {
+ ret = PTR_ERR(rd);
+ goto fail;
+ }
+
+ priv->rd = rd;
+
+ rd = rd_init(minor, "hangrd");
+ if (IS_ERR(rd)) {
+ ret = PTR_ERR(rd);
+ goto fail;
+ }
+
+ priv->hangrd = rd;
+
return 0;

fail:
msm_rd_debugfs_cleanup(priv);
- return -1;
+ return ret;
}

void msm_rd_debugfs_cleanup(struct msm_drm_private *priv)
{
- struct msm_rd_state *rd = priv->rd;
-
- if (!rd)
- return;
-
+ rd_cleanup(priv->rd);
priv->rd = NULL;
- mutex_destroy(&rd->read_lock);
- kfree(rd);
+
+ rd_cleanup(priv->hangrd);
+ priv->hangrd = NULL;
}

static void snapshot_buf(struct msm_rd_state *rd,
@@ -304,11 +346,10 @@ static void snapshot_buf(struct msm_rd_state *rd,
}

/* called under struct_mutex */
-void msm_rd_dump_submit(struct msm_gem_submit *submit)
+void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit)
{
struct drm_device *dev = submit->dev;
- struct msm_drm_private *priv = dev->dev_private;
- struct msm_rd_state *rd = priv->rd;
+ struct task_struct *task;
char msg[128];
int i, n;

@@ -320,9 +361,17 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit)
*/
WARN_ON(!mutex_is_locked(&dev->struct_mutex));

- n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u",
- TASK_COMM_LEN, current->comm, task_pid_nr(current),
- submit->fence->seqno);
+ rcu_read_lock();
+ task = pid_task(submit->pid, PIDTYPE_PID);
+ if (task) {
+ n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u",
+ TASK_COMM_LEN, task->comm,
+ pid_nr(submit->pid), submit->seqno);
+ } else {
+ n = snprintf(msg, sizeof(msg), "???/%d: fence=%u",
+ pid_nr(submit->pid), submit->seqno);
+ }
+ rcu_read_unlock();

rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));

--
2.13.6