In Linux printk() can output timestamps next to every line. This is very
useful for tracking regressions, and finding places that can be optimized.
However, the timestamps are available only later in boot. On smaller
machines it is insignificant amount of time, but on larger it can be many
seconds or even minutes into the boot process.
This patch adds an interface for platforms with unstable sched clock to
show timestamps early in boot. In order to get this functionality a
platform must do:
- Implement u64 sched_clock_early()
Clock that returns monotonic time
- Call sched_clock_early_init()
Tells sched clock that the early clock can be used
- Call sched_clock_early_fini()
Tells sched clock that the early clock is finished, and sched clock
should hand over the operation to permanent clock.
- Use weak sched_clock_early() interface to determine time from boot in
arch specific read_boot_clock64()
Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx>
---
arch/x86/kernel/time.c | 22 ++++++++++++++++
include/linux/sched/clock.h | 4 +++
kernel/sched/clock.c | 61 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 86 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e0754cdbad37..6ede0da7041a 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -14,6 +14,7 @@
#include <linux/i8253.h>
#include <linux/time.h>
#include <linux/export.h>
+#include <linux/sched/clock.h>
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
@@ -85,6 +86,7 @@ static __init void x86_late_time_init(void)
{
x86_init.timers.timer_init();
tsc_init();
+ tsc_early_fini();
}
/*
@@ -95,3 +97,23 @@ void __init time_init(void)
{
late_time_init = x86_late_time_init;
}
+
+/*
+ * Called once during to boot to initialize boot time.
+ */
+void read_boot_clock64(struct timespec64 *ts)
+{
+ u64 ns_boot = sched_clock_early(); /* nsec from boot */
+ struct timespec64 ts_now;
+ bool valid_clock;
+
+ /* Time from epoch */
+ read_persistent_clock64(&ts_now);
+ valid_clock = ns_boot && timespec64_valid_strict(&ts_now) &&
+ (ts_now.tv_sec || ts_now.tv_nsec);
+
+ if (!valid_clock)
+ *ts = (struct timespec64){0, 0};
+ else
+ *ts = ns_to_timespec64(timespec64_to_ns(&ts_now) - ns_boot);
+}
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index a55600ffdf4b..f8291fa28c0c 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -63,6 +63,10 @@ extern void sched_clock_tick_stable(void);
extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(void);
+void sched_clock_early_init(void);
+void sched_clock_early_fini(void);
+u64 sched_clock_early(void);
+
/*
* As outlined in clock.c, provides a fast, high resolution, nanosecond
* time source that is monotonic per cpu argument and has bounded drift
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index ca0f8fc945c6..be5b60af4ca9 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -80,9 +80,24 @@ EXPORT_SYMBOL_GPL(sched_clock);
__read_mostly int sched_clock_running;
+/*
+ * We start with sched clock early static branch enabled, and global status
+ * disabled. Early in boot it is decided whether to enable the global
+ * status as well (set sched_clock_early_running to true), and later, when
+ * early clock is no longer needed, the static branch is disabled.
+ */
+static DEFINE_STATIC_KEY_TRUE(__use_sched_clock_early);
+static bool __read_mostly sched_clock_early_running;
+
void sched_clock_init(void)
{
- sched_clock_running = 1;
+ /*
+ * We start clock only once early clock is finished, or if early clock
+ * was not running.
+ */
+ if (!sched_clock_early_running)
+ sched_clock_running = 1;
+
}
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
@@ -362,6 +377,11 @@ u64 sched_clock_cpu(int cpu)
if (sched_clock_stable())
return sched_clock() + __sched_clock_offset;
+ if (static_branch_unlikely(&__use_sched_clock_early)) {
+ if (sched_clock_early_running)
+ return sched_clock_early();
+ }
+
if (unlikely(!sched_clock_running))
return 0ull;
@@ -444,6 +464,45 @@ void sched_clock_idle_wakeup_event(void)
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
+u64 __weak sched_clock_early(void)
+{
+ return 0;
+}
+
+/*
+ * Is called when sched_clock_early() is about to be finished, notifies sched
+ * clock that after this call sched_clock_early() can't be used.
+ */
+void __init sched_clock_early_fini(void)
+{
+ struct sched_clock_data *scd = this_scd();
+ u64 now_early, now_sched;
+
+ now_early = sched_clock_early();
+ now_sched = sched_clock();
+
+ __gtod_offset = now_early - scd->tick_gtod;
+ __sched_clock_offset = now_early - now_sched;
+
+ sched_clock_early_running = false;
+ static_branch_disable(&__use_sched_clock_early);
+
+ /* Now that early clock is finished, start regular sched clock */
+ sched_clock_init();
+}
+
+/*
+ * Notifies sched clock that early boot clocksource is available, it means that
+ * the current platform has implemented sched_clock_early().
+ *
+ * The early clock is running until we switch to a stable clock, or when we
+ * learn that the stable clock is not available.
+ */
+void __init sched_clock_early_init(void)
+{
+ sched_clock_early_running = true;
+}
+
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
u64 sched_clock_cpu(int cpu)