[RFC PATCH] ftrace: support boot level tracing
From: Abderrahmane Benbachir
Date: Tue Mar 20 2018 - 14:32:29 EST
Hi Steve.
Initcall's tracing support have been recently added to ftrace, which enables
detecting latencies within each initcall function. The main concern here is,
enabling initcall's tracing (which is fine-grained) will add a lots of overhead
during boot-up. So with boot level tracing we can get the overall picture, with
low overhead, then we can use function tracing with filters to dig into more
specific path.
This is an example of boot level sequence, with the first initcall and the last
initcall that were retrieved dynamically at runtime:
0,console, first=con_init, last=univ8250_console_init
1,security, first=selinux_init, last=integrity_iintcache_init
2,early, first=trace_init_flags_sys_exit, last=initialize_ptr_random
3,pure, first=ipc_ns_init, last=net_ns_init
4,core, first=xen_pvh_gnttab_setup, last=__gnttab_init
5,postcore, first=irq_sysfs_init, last=kobject_uevent_init
6,arch, first=bts_init, last=pci_arch_init
7,subsys, first=init_vdso, last=watchdog_init
8,fs, first=nmi_warning_debugfs, last=acpi_reserve_resources
9,rootfs, first=populate_rootfs, last=ir_dev_scope_init
10,device, first=ia32_binfmt_init, last=mcheck_init_device
11,late, first=tboot_late_init, last=regulator_init_complete
To make it properly work, the idea is to register at boot time function based
events on the first and the last initcall of each boot level.
The created events will be recycled when the last sequence is reached.
This patch is only a proof of concept, I didn't use function based events
mechanism, I only changed the first and last initcalls to point to some
handlers, which worked fine, but need to changed:
+ (*first_initcall_fn) = trace_bootlevel_start_handler;
+ (*last_initcall_fn) = trace_bootlevel_end_handler;
I made this patch to get some feedbacks, and to know if this could be an
interresting feature to have in ftrace ?
Trace example when enabling boot level tracing along with function tracing:
[ 0.367507] : trace_bootlevel_start_handler: Start console boot level <----
[ 0.368240] : con_init <-console_init
...
[ 0.371869] : xen_cons_init <-console_init
[ 0.372409] : univ8250_console_init <-trace_bootlevel_end_handler
[ 0.373116] : trace_bootlevel_end_handler: End console boot level <----
...
[ 0.385005] : security_init <-start_kernel
[ 0.385550] : trace_bootlevel_start_handler: Start security boot level <----
[ 0.386291] : selinux_init <-security_init
[ 0.386831] : smack_init <-security_init
[ 0.387358] : tomoyo_init <-security_init
[ 0.387893] : apparmor_init <-security_init
[ 0.390694] : integrity_iintcache_init <-trace_bootlevel_end_handler
[ 0.391418] : trace_bootlevel_end_handler: End security boot level <----
...
[ 0.455924] : trace_bootlevel_start_handler: Start early boot level <----
[ 0.456659] : intel_pmu_init <-init_hw_perf_events
[ 0.457299] : p6_pmu_init <-intel_pmu_init
[ 0.457851] : numachip_system_init <-do_one_initcall
...
Signed-off-by: Abderrahmane Benbachir <abderrahmane.benbachir@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
kernel/trace/Makefile | 1 +
kernel/trace/trace_bootlevel.c | 137 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 138 insertions(+)
create mode 100644 kernel/trace/trace_bootlevel.c
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e2538c7638d4..46fbf0ab81d9 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_TRACING) += trace_bootlevel.o
obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_seq.o
obj-$(CONFIG_TRACING) += trace_stat.o
diff --git a/kernel/trace/trace_bootlevel.c b/kernel/trace/trace_bootlevel.c
new file mode 100644
index 000000000000..fa83b5ca0749
--- /dev/null
+++ b/kernel/trace/trace_bootlevel.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Tracing Boot levels : console, security, early, ... etc.
+ *
+ * Copyright (c) 2018-2019 Abderrahmane Benbachir <abderrahmane.benbachir@xxxxxxxxxx>
+ */
+#include <linux/init.h>
+#include <asm/setup.h>
+#include "trace.h"
+
+#define MAX_LEVELS_SIZE 12
+
+struct bootlevel {
+ int level;
+ initcall_t first_fn;
+ initcall_t last_fn;
+};
+
+static int current_level __initdata;
+static struct bootlevel bootlevels[MAX_LEVELS_SIZE] __initdata;
+
+/*
+ * Keep these in sync with initcalls in include/linux/init.h
+ */
+extern initcall_t __con_initcall_start[], __con_initcall_end[];
+extern initcall_t __security_initcall_start[], __security_initcall_end[];
+extern initcall_t __initcallrootfs_start[];
+extern initcall_t __initcall_start[];
+extern initcall_t __initcall0_start[];
+extern initcall_t __initcall1_start[];
+extern initcall_t __initcall2_start[];
+extern initcall_t __initcall3_start[];
+extern initcall_t __initcall4_start[];
+extern initcall_t __initcall5_start[];
+extern initcall_t __initcall6_start[];
+extern initcall_t __initcall7_start[];
+extern initcall_t __initcall_end[];
+
+static initcall_t *initcall_levels[] __initdata = {
+ __initcall_start, /* early */
+ __initcall0_start, /* pure */
+ __initcall1_start, /* core */
+ __initcall2_start, /* postcore */
+ __initcall3_start, /* arch */
+ __initcall4_start, /* subsys */
+ __initcall5_start, /* fs */
+ __initcallrootfs_start, /* rootfs */
+ __initcall6_start, /* device */
+ __initcall7_start, /* late */
+ __initcall_end,
+};
+/*
+ * This is the runtime order of boot levels
+ */
+static char *bootlevel_names[] __initdata = {
+ "console",
+ "security",
+ "early",
+ "pure",
+ "core",
+ "postcore",
+ "arch",
+ "subsys",
+ "fs",
+ "rootfs",
+ "device",
+ "late"
+};
+
+static __init int notrace trace_bootlevel_start_handler(void)
+{
+ initcall_t initcall;
+
+ if (current_level < 0 || current_level > MAX_LEVELS_SIZE)
+ return 1;
+
+ initcall = bootlevels[current_level].first_fn;
+
+ trace_printk("Start %s boot level\n", bootlevel_names[current_level]);
+
+ return (initcall) ();
+}
+
+static __init int notrace trace_bootlevel_end_handler(void)
+{
+ int ret;
+ initcall_t initcall;
+
+ if (current_level < 0 || current_level > MAX_LEVELS_SIZE)
+ return 1;
+
+ initcall = bootlevels[current_level].last_fn;
+ ret = (initcall) ();
+
+ trace_printk("End %s boot level\n", bootlevel_names[current_level]);
+
+ current_level++;
+
+ return ret;
+}
+
+static void __init set_bootlevel_probe(int level, initcall_t *first_initcall_fn,
+ initcall_t *last_initcall_fn)
+{
+ if (level < 0 || level > MAX_LEVELS_SIZE)
+ return;
+
+ bootlevels[level].level = level;
+ bootlevels[level].first_fn = *first_initcall_fn;
+ bootlevels[level].last_fn = *last_initcall_fn;
+
+ /* TODO: this mechanism should be replaced by function event probe */
+ (*first_initcall_fn) = trace_bootlevel_start_handler;
+ (*last_initcall_fn) = trace_bootlevel_end_handler;
+}
+
+static int __init set_trace_bootlevel(char *str)
+{
+ int i, level;
+
+ level = 0;
+ current_level = 0;
+
+ set_bootlevel_probe(level++, __con_initcall_start,
+ __con_initcall_end-1);
+ set_bootlevel_probe(level++, __security_initcall_start,
+ __security_initcall_end-1);
+
+ for (i = 0; i < ARRAY_SIZE(initcall_levels) - 1; i++) {
+ set_bootlevel_probe(level, initcall_levels[i], initcall_levels[i+1]-1);
+ level++;
+ }
+
+ return 1;
+}
+__setup("trace_bootlevel", set_trace_bootlevel);
--
2.11.0