[PATCH 2/2] printk, Add printk.clock kernel parameter
From: Prarit Bhargava
Date: Wed Jan 06 2016 - 08:01:15 EST
The script used in the analysis below:
dmesg_with_human_timestamps () {
$(type -P dmesg) "$@" | perl -w -e 'use strict;
my ($uptime) = do { local @ARGV="/proc/uptime";<>}; ($uptime) = ($uptime =~ /^(\d+)\./);
foreach my $line (<>) {
printf( ($line=~/^\[\s*(\d+)\.\d+\](.+)/) ? ( "[%s]%s\n", scalar localtime(time - $uptime + $1), $2 ) : $line )
}'
}
dmesg_with_human_timestamps
----8<----
Over the past years I've seen many reports of bugs that include
time-stamped kernel logs (enabled when CONFIG_PRINTK_TIME=y or
print.time=1 is specified as a kernel parameter) that do not align
with either external time stamped logs or /var/log/messages.
For example,
[root@intel-wildcatpass-06 ~]# date; echo "Hello!" > /dev/kmsg ; date
Thu Dec 17 13:58:31 EST 2015
Thu Dec 17 13:58:31 EST 2015
which displays
[83973.768912] Hello!
on the serial console.
Running a script to convert this to "boot time",
[root@intel-wildcatpass-06 ~]# ./human.sh | tail -1
[Thu Dec 17 13:59:57 2015] Hello!
which is already off by 1 minute and 26 seconds off after ~24 hours of
uptime.
This occurs because the time stamp is obtained from a call to
local_clock() which (on x86) is a direct call to the hardware. These
hardware clock reads are not modified by the standard ntp or ptp protocol,
while the other timestamps are, and that results in situations where
/var/log/messages timestamps are further and further offset from the
hardware clock timestamps.
This patch introduces printk.clock=[local|boot|real|tai] allowing a
user to specify an adjusted clock to use with printk timestamps. The
hardware clock, or the existing functionality, is preserved by default.
If the timekeeper_lock (used to protect writes from reads) is under
contention, the last known good time is output again and "??" is used as
the last two digits of time. This avoids contention issues with a CPU
panicking while another CPU is writing time data.
Cc: John Stultz <john.stultz@xxxxxxxxxx>
Cc: Xunlei Pang <pang.xunlei@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Baolin Wang <baolin.wang@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: Petr Mladek <pmladek@xxxxxxx>
Cc: Tejun Heo <tj@xxxxxxxxxx>
Cc: Peter Hurley <peter@xxxxxxxxxxxxxxxxxx>
Cc: Vasily Averin <vvs@xxxxxxxxxxxxx>
Cc: Joe Perches <joe@xxxxxxxxxxx>
Signed-off-by: Prarit Bhargava <prarit@xxxxxxxxxx>
---
include/linux/time64.h | 2 +
kernel/printk/printk.c | 105 ++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 99 insertions(+), 8 deletions(-)
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 367d5af..f4a35d9 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -39,6 +39,8 @@ struct itimerspec64 {
#define TIME64_MAX ((s64)~((u64)1 << 63))
#define KTIME_MAX ((s64)~((u64)1 << 63))
#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
+/* Maximum value returned by ktime_to_ns() */
+#define KTIME_MAX_NS ((u64)((U64_MAX>>1)))
#if __BITS_PER_LONG == 64
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 2ce8826..ffc05ee 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -419,6 +419,83 @@ static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len,
return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len);
}
+static u64 printk_try_real_ns(void)
+{
+ return ktime_try_real_ns();
+}
+
+static u64 printk_try_boot_ns(void)
+{
+ return ktime_try_boot_ns();
+}
+
+static u64 printk_try_tai_ns(void)
+{
+ return ktime_try_tai_ns();
+}
+
+/* default to local clock */
+static u64 last_time_stamp;
+static char *printk_clock = "local";
+static u64 (*printk_clock_fn)(void) = &local_clock;
+
+static u64 printk_get_timestamp(void)
+{
+ u64 ret = printk_clock_fn();
+
+ if (likely(ret != KTIME_MAX_NS))
+ last_time_stamp = ret;
+ return ret;
+}
+
+static int printk_clock_param_set(const char *val,
+ const struct kernel_param *kp)
+{
+ char *printk_clock_new = strstrip((char *)val);
+ int ret;
+
+ /* check if change is needed */
+ if (!strcmp(printk_clock_new, printk_clock))
+ return 0;
+
+ if (!strncmp("local", printk_clock_new, 5)) {
+ ret = param_set_charp(printk_clock_new, kp);
+ if (ret)
+ return ret;
+ printk_clock_fn = &local_clock;
+ } else if (!strncmp("real", printk_clock_new, 4)) {
+ ret = param_set_charp(printk_clock_new, kp);
+ if (ret)
+ return ret;
+ printk_clock_fn = &printk_try_real_ns;
+ } else if (!strncmp("boot", printk_clock_new, 4)) {
+ ret = param_set_charp(printk_clock_new, kp);
+ if (ret)
+ return ret;
+ printk_clock_fn = &printk_try_boot_ns;
+ } else if (!strncmp("tai", printk_clock_new, 3)) {
+ ret = param_set_charp(printk_clock_new, kp);
+ if (ret)
+ return ret;
+ printk_clock_fn = &printk_try_tai_ns;
+ }
+
+ /* Did the clock change ? */
+ if (strcmp(printk_clock_new, printk_clock))
+ return -EINVAL;
+
+ pr_info("printk: timestamp set to %s clock.\n", printk_clock);
+ return 0;
+}
+
+static struct kernel_param_ops printk_clock_param_ops = {
+ .set = printk_clock_param_set,
+ .get = param_get_charp,
+};
+
+module_param_cb(clock, &printk_clock_param_ops, &printk_clock, 0644);
+MODULE_PARM_DESC(clock, "Clock used for printk timestamps. Can be local (hardware/default), boot, real, or tai.\n");
+
/* insert record into the buffer, discard old ones, update heads */
static int log_store(int facility, int level,
enum log_flags flags, u64 ts_nsec,
@@ -467,7 +544,7 @@ static int log_store(int facility, int level,
if (ts_nsec > 0)
msg->ts_nsec = ts_nsec;
else
- msg->ts_nsec = local_clock();
+ msg->ts_nsec = printk_get_timestamp();
memset(log_dict(msg) + dict_len, 0, pad_len);
msg->len = size;
@@ -1039,18 +1116,30 @@ module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
static size_t print_time(u64 ts, char *buf)
{
- unsigned long rem_nsec;
+ unsigned long rem;
+ size_t size;
if (!printk_time)
return 0;
- rem_nsec = do_div(ts, 1000000000);
+ if (ts == KTIME_MAX_NS)
+ ts = last_time_stamp;
- if (!buf)
- return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts);
+ if (buf) {
+ size = INT_MAX;
+ rem = do_div(ts, 1000000000) / 1000;
+ } else {
+ size = 0;
+ rem = 0;
+ }
- return sprintf(buf, "[%5lu.%06lu] ",
- (unsigned long)ts, rem_nsec / 1000);
+ if (ts != KTIME_MAX_NS) {
+ return snprintf(buf, size, "[%5lu.%06lu] ",
+ (unsigned long)ts, rem);
+ }
+ /* return last_time_stamp with ?? warning */
+ return snprintf(buf, size, "[%5lu.%04lu??] ",
+ (unsigned long)ts, rem);
}
static size_t print_prefix(const struct printk_log *msg, bool syslog, char *buf)
@@ -1613,7 +1702,7 @@ static bool cont_add(int facility, int level, const char *text, size_t len)
cont.facility = facility;
cont.level = level;
cont.owner = current;
- cont.ts_nsec = local_clock();
+ cont.ts_nsec = printk_get_timestamp();
cont.flags = 0;
cont.cons = 0;
cont.flushed = false;
--
1.7.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/