[PATCH v8 24/25] timer_migration: Add tracepoints
From: Anna-Maria Behnsen
Date: Wed Oct 04 2023 - 08:36:37 EST
The timer pull logic needs proper debugging aids. Add tracepoints so the
hierarchical idle machinery can be diagnosed.
Signed-off-by: Anna-Maria Behnsen <anna-maria@xxxxxxxxxxxxx>
---
v8: Add wakeup value to tracepoints
---
include/trace/events/timer_migration.h | 283 +++++++++++++++++++++++++
kernel/time/timer_migration.c | 24 +++
2 files changed, 307 insertions(+)
create mode 100644 include/trace/events/timer_migration.h
diff --git a/include/trace/events/timer_migration.h b/include/trace/events/timer_migration.h
new file mode 100644
index 000000000000..b8232c1a1ebf
--- /dev/null
+++ b/include/trace/events/timer_migration.h
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM timer_migration
+
+#if !defined(_TRACE_TIMER_MIGRATION_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TIMER_MIGRATION_H
+
+#include <linux/tracepoint.h>
+
+/* Group events */
+TRACE_EVENT(tmigr_group_set,
+
+ TP_PROTO(struct tmigr_group *group),
+
+ TP_ARGS(group),
+
+ TP_STRUCT__entry(
+ __field( void *, group )
+ __field( unsigned int, lvl )
+ __field( unsigned int, numa_node )
+ ),
+
+ TP_fast_assign(
+ __entry->group = group;
+ __entry->lvl = group->level;
+ __entry->numa_node = group->numa_node;
+ ),
+
+ TP_printk("group=%p lvl=%d numa=%d",
+ __entry->group, __entry->lvl, __entry->numa_node)
+);
+
+TRACE_EVENT(tmigr_connect_child_parent,
+
+ TP_PROTO(struct tmigr_group *child),
+
+ TP_ARGS(child),
+
+ TP_STRUCT__entry(
+ __field( void *, child )
+ __field( void *, parent )
+ __field( unsigned int, lvl )
+ __field( unsigned int, numa_node )
+ __field( unsigned int, num_children )
+ __field( u32, childmask )
+ ),
+
+ TP_fast_assign(
+ __entry->child = child;
+ __entry->parent = child->parent;
+ __entry->lvl = child->parent->level;
+ __entry->numa_node = child->parent->numa_node;
+ __entry->numa_node = child->parent->num_children;
+ __entry->childmask = child->childmask;
+ ),
+
+ TP_printk("group=%p childmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
+ __entry->child, __entry->childmask, __entry->parent,
+ __entry->lvl, __entry->numa_node, __entry->num_children)
+);
+
+TRACE_EVENT(tmigr_connect_cpu_parent,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc),
+
+ TP_STRUCT__entry(
+ __field( void *, parent )
+ __field( unsigned int, cpu )
+ __field( unsigned int, lvl )
+ __field( unsigned int, numa_node )
+ __field( unsigned int, num_children )
+ __field( u32, childmask )
+ ),
+
+ TP_fast_assign(
+ __entry->parent = tmc->tmgroup;
+ __entry->cpu = tmc->cpuevt.cpu;
+ __entry->lvl = tmc->tmgroup->level;
+ __entry->numa_node = tmc->tmgroup->numa_node;
+ __entry->numa_node = tmc->tmgroup->num_children;
+ __entry->childmask = tmc->childmask;
+ ),
+
+ TP_printk("cpu=%d childmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
+ __entry->cpu, __entry->childmask, __entry->parent,
+ __entry->lvl, __entry->numa_node, __entry->num_children)
+);
+
+DECLARE_EVENT_CLASS(tmigr_group_and_cpu,
+
+ TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),
+
+ TP_ARGS(group, state, childmask),
+
+ TP_STRUCT__entry(
+ __field( void *, group )
+ __field( void *, parent )
+ __field( unsigned int, lvl )
+ __field( unsigned int, numa_node )
+ __field( u8, active )
+ __field( u8, migrator )
+ __field( u32, childmask )
+ ),
+
+ TP_fast_assign(
+ __entry->group = group;
+ __entry->parent = group->parent;
+ __entry->lvl = group->level;
+ __entry->numa_node = group->numa_node;
+ __entry->active = state.active;
+ __entry->migrator = state.migrator;
+ __entry->childmask = childmask;
+ ),
+
+ TP_printk("group=%p lvl=%d numa=%d active=%0x migrator=%0x "
+ "parent=%p childmask=%0x",
+ __entry->group, __entry->lvl, __entry->numa_node,
+ __entry->active, __entry->migrator,
+ __entry->parent, __entry->childmask)
+);
+
+DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_inactive,
+
+ TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),
+
+ TP_ARGS(group, state, childmask)
+);
+
+DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_active,
+
+ TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),
+
+ TP_ARGS(group, state, childmask)
+);
+
+/* CPU events*/
+DECLARE_EVENT_CLASS(tmigr_cpugroup,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc),
+
+ TP_STRUCT__entry(
+ __field( void *, parent)
+ __field( unsigned int, cpu)
+ __field( u64, wakeup)
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = tmc->cpuevt.cpu;
+ __entry->parent = tmc->tmgroup;
+ __entry->wakeup = tmc->wakeup;
+ ),
+
+ TP_printk("cpu=%d parent=%p wakeup=%llu", __entry->cpu, __entry->parent, __entry->wakeup)
+);
+
+DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_new_timer,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc)
+);
+
+DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_active,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc)
+);
+
+DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_online,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc)
+);
+
+DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_offline,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc)
+);
+
+DEFINE_EVENT(tmigr_cpugroup, tmigr_handle_remote_cpu,
+
+ TP_PROTO(struct tmigr_cpu *tmc),
+
+ TP_ARGS(tmc)
+);
+
+TRACE_EVENT(tmigr_cpu_idle,
+
+ TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt),
+
+ TP_ARGS(tmc, nextevt),
+
+ TP_STRUCT__entry(
+ __field( void *, parent)
+ __field( unsigned int, cpu)
+ __field( u64, nextevt)
+ __field( u64, wakeup)
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = tmc->cpuevt.cpu;
+ __entry->parent = tmc->tmgroup;
+ __entry->nextevt = nextevt;
+ __entry->wakeup = tmc->wakeup;
+ ),
+
+ TP_printk("cpu=%d parent=%p nextevt=%llu wakeup=%llu",
+ __entry->cpu, __entry->parent, __entry->nextevt, __entry->wakeup)
+);
+
+TRACE_EVENT(tmigr_update_events,
+
+ TP_PROTO(struct tmigr_group *child, struct tmigr_group *group,
+ union tmigr_state childstate, union tmigr_state groupstate,
+ u64 nextevt),
+
+ TP_ARGS(child, group, childstate, groupstate, nextevt),
+
+ TP_STRUCT__entry(
+ __field( void *, child )
+ __field( void *, group )
+ __field( u64, nextevt )
+ __field( u64, group_next_expiry )
+ __field( unsigned int, group_lvl )
+ __field( u8, child_active )
+ __field( u8, group_active )
+ __field( unsigned int, child_evtcpu )
+ __field( u64, child_evt_expiry )
+ ),
+
+ TP_fast_assign(
+ __entry->child = child;
+ __entry->group = group;
+ __entry->nextevt = nextevt;
+ __entry->group_next_expiry = group->next_expiry;
+ __entry->group_lvl = group->level;
+ __entry->child_active = childstate.active;
+ __entry->group_active = groupstate.active;
+ __entry->child_evtcpu = child ? child->groupevt.cpu : 0;
+ __entry->child_evt_expiry = child ? child->groupevt.nextevt.expires : 0;
+ ),
+
+ TP_printk("child=%p group=%p group_lvl=%d child_active=%0x group_active=%0x "
+ "nextevt=%llu next_expiry=%llu child_evt_expiry=%llu child_evtcpu=%d",
+ __entry->child, __entry->group, __entry->group_lvl, __entry->child_active,
+ __entry->group_active,
+ __entry->nextevt, __entry->group_next_expiry, __entry->child_evt_expiry,
+ __entry->child_evtcpu)
+);
+
+TRACE_EVENT(tmigr_handle_remote,
+
+ TP_PROTO(struct tmigr_group *group),
+
+ TP_ARGS(group),
+
+ TP_STRUCT__entry(
+ __field( void * , group )
+ __field( unsigned int , lvl )
+ ),
+
+ TP_fast_assign(
+ __entry->group = group;
+ __entry->lvl = group->level;
+ ),
+
+ TP_printk("group=%p lvl=%d",
+ __entry->group, __entry->lvl)
+);
+
+#endif /* _TRACE_TIMER_MIGRATION_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index 929b3b94b893..5ac6b4f04c55 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -14,6 +14,9 @@
#include "timer_migration.h"
#include "tick-internal.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/timer_migration.h>
+
/*
* The timer migration mechanism is built on a hierarchy of groups. The
* lowest level group contains CPUs, the next level groups of CPU groups
@@ -514,6 +517,8 @@ static bool tmigr_active_up(struct tmigr_group *group,
*/
group->groupevt.ignore = true;
+ trace_tmigr_group_set_cpu_active(group, newstate, childmask);
+
return walk_done;
}
@@ -545,6 +550,7 @@ void tmigr_cpu_activate(void)
raw_spin_lock(&tmc->lock);
tmc->idle = false;
+ trace_tmigr_cpu_active(tmc);
__tmigr_cpu_activate(tmc);
raw_spin_unlock(&tmc->lock);
}
@@ -688,6 +694,9 @@ static bool tmigr_update_events(struct tmigr_group *group,
data->nextexp = tmigr_next_groupevt_expires(group);
}
+ trace_tmigr_update_events(child, group, data->childstate,
+ data->groupstate, nextexp);
+
unlock:
raw_spin_unlock(&group->lock);
@@ -721,6 +730,8 @@ static u64 tmigr_new_timer(struct tmigr_cpu *tmc, u64 nextexp)
if (tmc->remote)
return KTIME_MAX;
+ trace_tmigr_cpu_new_timer(tmc);
+
tmc->cpuevt.ignore = false;
data.remote = false;
@@ -754,6 +765,8 @@ static u64 tmigr_handle_remote_cpu(unsigned int cpu, u64 now,
return next;
}
+ trace_tmigr_handle_remote_cpu(tmc);
+
tmc->remote = true;
WRITE_ONCE(tmc->wakeup, KTIME_MAX);
@@ -838,6 +851,7 @@ static bool tmigr_handle_remote_up(struct tmigr_group *group,
childmask = data->childmask;
+ trace_tmigr_handle_remote(group);
again:
/*
* Handle the group only if @childmask is the migrator or if the
@@ -1156,6 +1170,8 @@ static bool tmigr_inactive_up(struct tmigr_group *group,
}
}
+ trace_tmigr_group_set_cpu_inactive(group, newstate, childmask);
+
return walk_done;
}
@@ -1244,6 +1260,7 @@ u64 tmigr_cpu_deactivate(u64 nextexp)
*/
WRITE_ONCE(tmc->wakeup, ret);
+ trace_tmigr_cpu_idle(tmc, nextexp);
raw_spin_unlock(&tmc->lock);
return ret;
}
@@ -1352,6 +1369,7 @@ static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node,
/* Setup successful. Add it to the hierarchy */
list_add(&group->list, &tmigr_level_list[lvl]);
+ trace_tmigr_group_set(group);
return group;
}
@@ -1369,6 +1387,8 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
raw_spin_unlock(&parent->lock);
raw_spin_unlock_irq(&child->lock);
+ trace_tmigr_connect_child_parent(child);
+
/*
* To prevent inconsistent states, active children need to be active in
* the new parent as well. Inactive children are already marked inactive
@@ -1450,6 +1470,8 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node)
raw_spin_unlock_irq(&group->lock);
+ trace_tmigr_connect_cpu_parent(tmc);
+
/* There are no children that need to be connected */
continue;
} else {
@@ -1518,6 +1540,7 @@ static int tmigr_cpu_online(unsigned int cpu)
WRITE_ONCE(tmc->wakeup, KTIME_MAX);
}
raw_spin_lock_irq(&tmc->lock);
+ trace_tmigr_cpu_online(tmc);
__tmigr_cpu_activate(tmc);
tmc->online = true;
raw_spin_unlock_irq(&tmc->lock);
@@ -1537,6 +1560,7 @@ static int tmigr_cpu_offline(unsigned int cpu)
* offline; Therefore nextevt value is set to KTIME_MAX
*/
__tmigr_cpu_deactivate(tmc, KTIME_MAX);
+ trace_tmigr_cpu_offline(tmc);
raw_spin_unlock_irq(&tmc->lock);
return 0;
--
2.39.2