Re: [PATCH] drm/xe: Add creator TID to sched job trace points

From: Matthew Brost

Date: Mon Jun 29 2026 - 11:36:45 EST


On Mon, Jun 29, 2026 at 02:15:27PM +0800, Chen, Junjia wrote:
> Capture the creating task pid when a scheduler job is allocated
> and emit it in the sched job trace events alongside the
> exec queue pointer.
>
> This makes it easier to correlate queue activity back to
> the userspace thread that submitted the work.
>
> Signed-off-by: Chen, Junjia <junjia.chen@xxxxxxxxx>
> ---
> drivers/gpu/drm/xe/xe_sched_job.c | 2 ++
> drivers/gpu/drm/xe/xe_sched_job_types.h | 3 +++
> drivers/gpu/drm/xe/xe_trace.h | 12 +++++++++---
> 3 files changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
> index b64e6a434807..1356755cd180 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job.c
> +++ b/drivers/gpu/drm/xe/xe_sched_job.c
> @@ -7,6 +7,7 @@
>
> #include <uapi/drm/xe_drm.h>
> #include <linux/dma-fence-chain.h>
> +#include <linux/sched.h>
> #include <linux/slab.h>
>
> #include "xe_device.h"
> @@ -111,6 +112,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
>
> job->q = q;
> job->sample_timestamp = U64_MAX;
> + job->creator_tid = task_pid_nr(current);
> kref_init(&job->refcount);
> xe_exec_queue_get(job->q);
>
> diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
> index 0490b1247a6e..f2ad94105b37 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job_types.h
> +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
> @@ -7,6 +7,7 @@
> #define _XE_SCHED_JOB_TYPES_H_
>
> #include <linux/kref.h>
> +#include <linux/types.h>
>
> #include <drm/gpu_scheduler.h>
>
> @@ -59,6 +60,8 @@ struct xe_sched_job {
> u32 lrc_seqno;
> /** @migrate_flush_flags: Additional flush flags for migration jobs */
> u32 migrate_flush_flags;
> + /** @creator_tid: task pid that created this job */
> + pid_t creator_tid;
> /** @sample_timestamp: Sampling of job timestamp in TDR */
> u64 sample_timestamp;
> /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
> diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
> index dfd87f03c4fd..a3115d6e4c6a 100644
> --- a/drivers/gpu/drm/xe/xe_trace.h
> +++ b/drivers/gpu/drm/xe/xe_trace.h
> @@ -258,8 +258,10 @@ DECLARE_EVENT_CLASS(xe_sched_job,
>
> TP_STRUCT__entry(
> __string(dev, __dev_name_eq(job->q))
> + __field(struct xe_exec_queue *, q)

I'm not sure how helpful the 'q' is here as it can be inferred the gt_id
and guc_id + correlated exec_queue trace points too. Also the exec_queue
trace don't print the queue tracepoint either.

> __field(u32, seqno)
> __field(u32, lrc_seqno)
> + __field(pid_t, creator_tid)
> __field(u8, gt_id)
> __field(u16, guc_id)
> __field(u32, guc_state)
> @@ -271,8 +273,10 @@ DECLARE_EVENT_CLASS(xe_sched_job,
>
> TP_fast_assign(
> __assign_str(dev);
> + __entry->q = job->q;
> __entry->seqno = xe_sched_job_seqno(job);
> __entry->lrc_seqno = xe_sched_job_lrc_seqno(job);
> + __entry->creator_tid = job->creator_tid;

Should we also include the creator_tid in exec_queue trace points too?

Generally the exec_queue and job tracepoints are used together.

Matt

> __entry->gt_id = job->q->gt->info.id;
> __entry->guc_id = job->q->guc->id;
> __entry->guc_state =
> @@ -283,9 +287,11 @@ DECLARE_EVENT_CLASS(xe_sched_job,
> __entry->batch_addr = (u64)job->ptrs[0].batch_addr;
> ),
>
> - TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, gt=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
> - __get_str(dev), __entry->fence, __entry->seqno,
> - __entry->lrc_seqno, __entry->gt_id, __entry->guc_id,
> + TP_printk("dev=%s, q=%p, fence=%p, seqno=%u, lrc_seqno=%u, creator_tid=%d, gt=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
> + __get_str(dev), __entry->q, __entry->fence,
> + __entry->seqno, __entry->lrc_seqno,
> + __entry->creator_tid,
> + __entry->gt_id, __entry->guc_id,
> __entry->batch_addr, __entry->guc_state,
> __entry->flags, __entry->error)
> );
> --
> 2.43.0
>