Re: [RFC PATCH V0 09/10] trace/kmmscand: Add tracing of scanning and migration

From: Steven Rostedt
Date: Thu Dec 05 2024 - 12:46:52 EST


On Sun, 1 Dec 2024 15:38:17 +0000
Raghavendra K T <raghavendra.kt@xxxxxxx> wrote:

> Add tracing support to track
> - start and end of scanning.
> - migration.
>
> CC: Steven Rostedt <rostedt@xxxxxxxxxxx>
> CC: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
> CC: linux-trace-kernel@xxxxxxxxxxxxxxx
>
> Signed-off-by: Raghavendra K T <raghavendra.kt@xxxxxxx>
> ---
> include/trace/events/kmem.h | 99 +++++++++++++++++++++++++++++++++++++
> mm/kmmscand.c | 12 ++++-
> 2 files changed, 110 insertions(+), 1 deletion(-)
>
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index b37eb0a7060f..80978d85300d 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -9,6 +9,105 @@
> #include <linux/tracepoint.h>
> #include <trace/events/mmflags.h>
>
> +TRACE_EVENT(kmem_mm_enter,
> +
> + TP_PROTO(struct task_struct *t,
> + struct mm_struct *mm),
> +
> + TP_ARGS(t, mm),
> +
> + TP_STRUCT__entry(
> + __array( char, comm, TASK_COMM_LEN )

Is there a reason to record "comm"? There's other ways to retrieve it than
to always write it to the ring buffer.

> + __field( struct mm_struct *, mm )
> + ),
> +
> + TP_fast_assign(
> + memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
> + __entry->mm = mm;
> + ),
> +
> + TP_printk("kmmscand: mm_enter comm =%s mm=%p", __entry->comm, __entry->mm)
> +);
> +
> +TRACE_EVENT(kmem_scan_mm_start,
> +
> + TP_PROTO(struct task_struct *t,
> + struct mm_struct *mm),
> +
> + TP_ARGS(t, mm),
> +
> + TP_STRUCT__entry(
> + __array( char, comm, TASK_COMM_LEN )
> + __field( struct mm_struct *, mm )
> + ),
> +
> + TP_fast_assign(
> + memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
> + __entry->mm = mm;
> + ),
> +
> + TP_printk("kmmscand: scan_mm_start comm =%s mm=%p", __entry->comm, __entry->mm)

No need to write the event name into the TP_printk(). That's redundant.

Also, the above two events are pretty much identical. Please use
DECLARE_EVENT_CLASS().

> +);
> +
> +TRACE_EVENT(kmem_scan_mm_end,
> +
> + TP_PROTO(struct task_struct *t,
> + struct mm_struct *mm,
> + unsigned long start,
> + unsigned long total,
> + unsigned long scan_size,
> + unsigned long scan_period),
> +
> + TP_ARGS(t, mm, start, total, scan_period, scan_size),
> +
> + TP_STRUCT__entry(
> + __array( char, comm, TASK_COMM_LEN )

Again, why comm?

> + __field( struct mm_struct *, mm )
> + __field( unsigned long, start )
> + __field( unsigned long, total )
> + __field( unsigned long, scan_period )
> + __field( unsigned long, scan_size )
> + ),
> +
> + TP_fast_assign(
> + memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
> + __entry->mm = mm;
> + __entry->start = start;
> + __entry->total = total;
> + __entry->scan_period = scan_period;
> + __entry->scan_size = scan_size;
> + ),
> +
> + TP_printk("kmmscand: scan_mm_end comm =%s mm=%p, start = %ld, total = %ld, scan_period = %ld, scan_size = %ld",
> + __entry->comm, __entry->mm, __entry->start,
> + __entry->total, __entry->scan_period, __entry->scan_size)
> +);
> +
> +TRACE_EVENT(kmem_scan_mm_migrate,
> +
> + TP_PROTO(struct task_struct *t,
> + struct mm_struct *mm,
> + int rc),
> +
> + TP_ARGS(t, mm, rc),
> +
> + TP_STRUCT__entry(
> + __array( char, comm, TASK_COMM_LEN )
> + __field( struct mm_struct *, mm )
> + __field( int, rc )
> + ),
> +
> + TP_fast_assign(
> + memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
> + __entry->mm = mm;
> + __entry->rc = rc;
> + ),
> +
> + TP_printk("kmmscand: scan_mm_migrate comm =%s mm=%p rc=%d", __entry->comm,
> + __entry->mm, __entry->rc)
> +);
> +
> +
> TRACE_EVENT(kmem_cache_alloc,
>
> TP_PROTO(unsigned long call_site,
> diff --git a/mm/kmmscand.c b/mm/kmmscand.c
> index 682c0523c0b4..70f588a210dd 100644
> --- a/mm/kmmscand.c
> +++ b/mm/kmmscand.c
> @@ -668,8 +668,10 @@ static void kmmscand_migrate_folio(void)
> WRITE_ONCE(kmmscand_cur_migrate_mm, info->mm);
> spin_unlock(&kmmscand_migrate_lock);
>
> - if (info->mm)
> + if (info->mm) {
> ret = kmmscand_promote_folio(info);
> + trace_kmem_scan_mm_migrate(info->mm->owner, info->mm, ret);
> + }
>
> /* TBD: encode migrated count here, currently assume folio_nr_pages */
> if (!ret)
> @@ -828,6 +830,9 @@ static unsigned long kmmscand_scan_mm_slot(void)
> goto outerloop;
> }
>
> + if (mm->owner)
> + trace_kmem_scan_mm_start(mm->owner, mm);
> +
> now = jiffies;
> /*
> * Dont scan if :
> @@ -868,6 +873,10 @@ static unsigned long kmmscand_scan_mm_slot(void)
>
> update_mmslot_info = true;
>
> + if (mm->owner)
> + trace_kmem_scan_mm_end(mm->owner, mm, address, total,
> + mm_slot_scan_period, mm_slot_scan_size);

Please do not add conditions that is used just for calling a tracepoint.
That takes away the "nop" of the function. You can either use
TRACE_EVENT_CONDITION() or DEFINE_EVENT_CONDITION(), or you can hard code
it here:

if (trace_kmem_scan_mm_end_enabled()) {
if (mm->owner)
trace_kmem_scan_mm_end(mm->owner, mm, address, total,
mm_slot_scan_period, mm_slot_scan_size);
}

But since it is a single condition, I would prefer the *_CONDITION() macros
above.

-- Steve

> +
> count_kmmscand_mm_scans();
>
> outerloop:
> @@ -1020,6 +1029,7 @@ void __kmmscand_enter(struct mm_struct *mm)
> spin_unlock(&kmmscand_mm_lock);
>
> mmgrab(mm);
> + trace_kmem_mm_enter(mm->owner, mm);
> if (wakeup)
> wake_up_interruptible(&kmmscand_wait);
> }