Re: [PATCH] printk, allow different timestamps for printk.time [v2]

From: Vasily Averin
Date: Thu Jan 28 2016 - 07:53:14 EST


Dear Prarit,

I have no objections about your patch,
bit in fact I doubt we really need to convert each timestamp in kernel logs.
How do you think is it probably better to convert only one timestamp per screen ?
I.e. convert it in each 25th string only?
Or just do it once per N seconds?
And do not replace original timestamp but add converted one?

Thank you,
Vasily Averin

On 28.01.2016 15:43, Prarit Bhargava wrote:
> Over the past years I've seen many reports of bugs that include
> time-stamped kernel logs (enabled when CONFIG_PRINTK_TIME=y or
> print.time=1 is specified as a kernel parameter) that do not align
> with either external time stamped logs or /var/log/messages. This
> also makes determining the time of a failure difficult in cases where
> /var/log/messages is unavailable.
>
> For example,
>
> [root@intel-wildcatpass-06 ~]# date; echo "Hello!" > /dev/kmsg ; date
> Thu Dec 17 13:58:31 EST 2015
> Thu Dec 17 13:58:31 EST 2015
>
> which displays
>
> [83973.768912] Hello!
>
> on the serial console.
>
> Running a script to convert this to the stamped time,
>
> [root@intel-wildcatpass-06 ~]# ./human.sh | tail -1
> [Thu Dec 17 13:59:57 2015] Hello!
>
> which is already off by 1 minute and 26 seconds off after ~24 hours of
> uptime.
>
> This occurs because the time stamp is obtained from a call to
> local_clock() which (on x86) is a direct call to the hardware. These
> hardware clock reads are not modified by the standard ntp or ptp protocol,
> while the other timestamps are, and that results in situations external
> time sources are further and further offset from the kernel log
> timestamps.
>
> This patch introduces printk.time=[0-3] allowing a user to specify an adjusted
> clock to use with printk timestamps. The hardware clock, or the existing
> functionality, is preserved by default.
>
> Real clock & 32-bit systems: Selecting the real clock printk timestamp may
> lead to unlikely situations where a timestamp is wrong because the real time
> offset is read without the protection of a sequence lock in the call to
> ktime_get_log_ts() in printk_get_ts().
>
> [v2]: use NMI safe timekeeping access functions
> [v3]: use tglx's ktime_get_log_ts() suggestion to get monotonic & real clocks
> [v4]: Fix kernel parameters
>
> Cc: John Stultz <john.stultz@xxxxxxxxxx>
> Cc: Xunlei Pang <pang.xunlei@xxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Baolin Wang <baolin.wang@xxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
> Cc: Petr Mladek <pmladek@xxxxxxx>
> Cc: Tejun Heo <tj@xxxxxxxxxx>
> Cc: Peter Hurley <peter@xxxxxxxxxxxxxxxxxx>
> Cc: Vasily Averin <vvs@xxxxxxxxxxxxx>
> Cc: Joe Perches <joe@xxxxxxxxxxx>
> Signed-off-by: Prarit Bhargava <prarit@xxxxxxxxxx>
> ---
> Documentation/kernel-parameters.txt | 6 ++-
> include/linux/timekeeping.h | 1 +
> kernel/printk/printk.c | 79 +++++++++++++++++++++++++++++++++--
> kernel/time/timekeeping.c | 13 ++++++
> lib/Kconfig.debug | 2 +-
> 5 files changed, 94 insertions(+), 7 deletions(-)
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 87d40a7..32f8dba 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -3060,8 +3060,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
> Format: <bool> (1/Y/y=enable, 0/N/n=disable)
> default: disabled
>
> - printk.time= Show timing data prefixed to each printk message line
> - Format: <bool> (1/Y/y=enable, 0/N/n=disable)
> + printk.time= Show timestamp prefixed to each printk message line
> + Format: <string>
> + (0/N/n = disable, 1/Y/y = local clock,
> + 2 = monotonic clock, 3 = real clock)
>
> processor.max_cstate= [HW,ACPI]
> Limit processor to maximum C-state
> diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
> index ec89d84..0b99a84 100644
> --- a/include/linux/timekeeping.h
> +++ b/include/linux/timekeeping.h
> @@ -233,6 +233,7 @@ static inline u64 ktime_get_raw_ns(void)
>
> extern u64 ktime_get_mono_fast_ns(void);
> extern u64 ktime_get_raw_fast_ns(void);
> +extern u64 ktime_get_log_ts(u64 *offset_real);
>
> /*
> * Timespec interfaces utilizing the ktime based ones
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index c963ba5..19ec0d1 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -420,6 +420,8 @@ static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len,
> return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len);
> }
>
> +static u64 printk_get_ts(void);
> +
> /* insert record into the buffer, discard old ones, update heads */
> static int log_store(int facility, int level,
> enum log_flags flags, u64 ts_nsec,
> @@ -468,7 +470,7 @@ static int log_store(int facility, int level,
> if (ts_nsec > 0)
> msg->ts_nsec = ts_nsec;
> else
> - msg->ts_nsec = local_clock();
> + msg->ts_nsec = printk_get_ts();
> memset(log_dict(msg) + dict_len, 0, pad_len);
> msg->len = size;
>
> @@ -1035,8 +1037,77 @@ static inline void boot_delay_msec(int level)
> }
> #endif
>
> -static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
> -module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
> +static int printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
> +
> +/*
> + * Real clock & 32-bit systems: Selecting the real clock printk timestamp may
> + * lead to unlikely situations where a timestamp is wrong because the real time
> + * offset is read without the protection of a sequence lock in the call to
> + * ktime_get_log_ts() in printk_get_ts() below.
> + */
> +static int printk_time_param_set(const char *val,
> + const struct kernel_param *kp)
> +{
> + char *param = strstrip((char *)val);
> +
> + if (strlen(param) != 1)
> + return -EINVAL;
> +
> + switch (param[0]) {
> + /* 0/N/n = disabled */
> + case '0':
> + case 'N':
> + case 'n':
> + printk_time = 0;
> + break;
> + /* 1/Y/y = local clock */
> + case '1':
> + case 'Y':
> + case 'y':
> + printk_time = 1;
> + break;
> + /* 2 = monotonic clock */
> + case '2':
> + printk_time = 2;
> + break;
> + /* 3 = real clock */
> + case '3':
> + printk_time = 3;
> + break;
> + default:
> + pr_warn("printk: invalid timestamp value\n");
> + return -EINVAL;
> + break;
> + }
> +
> + pr_info("printk: timestamp set to %d.\n", printk_time);
> + return 0;
> +}
> +
> +static struct kernel_param_ops printk_time_param_ops = {
> + .set = printk_time_param_set,
> + .get = param_get_int,
> +};
> +
> +module_param_cb(time, &printk_time_param_ops, &printk_time, S_IRUGO | S_IWUSR);
> +
> +static u64 printk_get_ts(void)
> +{
> + u64 mono, offset_real;
> +
> + if (printk_time == 0)
> + return 0;
> +
> + if (printk_time == 1)
> + return local_clock();
> +
> + mono = ktime_get_log_ts(&offset_real);
> +
> + if (printk_time == 2)
> + return mono;
> +
> + return mono + offset_real;
> +}
>
> static size_t print_time(u64 ts, char *buf)
> {
> @@ -1614,7 +1685,7 @@ static bool cont_add(int facility, int level, const char *text, size_t len)
> cont.facility = facility;
> cont.level = level;
> cont.owner = current;
> - cont.ts_nsec = local_clock();
> + cont.ts_nsec = printk_get_ts();
> cont.flags = 0;
> cont.cons = 0;
> cont.flushed = false;
> diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
> index 34b4ced..698e1a2 100644
> --- a/kernel/time/timekeeping.c
> +++ b/kernel/time/timekeeping.c
> @@ -43,6 +43,7 @@ static struct {
>
> static DEFINE_RAW_SPINLOCK(timekeeper_lock);
> static struct timekeeper shadow_timekeeper;
> +static int timekeeping_active;
>
> /**
> * struct tk_fast - NMI safe timekeeper
> @@ -401,6 +402,16 @@ u64 ktime_get_raw_fast_ns(void)
> }
> EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
>
> +u64 ktime_get_log_ts(u64 *offset_real)
> +{
> + *offset_real = ktime_to_ns(tk_core.timekeeper.offs_real);
> +
> + if (timekeeping_active)
> + return ktime_get_mono_fast_ns();
> + else
> + return local_clock();
> +}
> +
> /* Suspend-time cycles value for halted fast timekeeper. */
> static cycle_t cycles_at_suspend;
>
> @@ -1267,6 +1278,8 @@ void __init timekeeping_init(void)
>
> write_seqcount_end(&tk_core.seq);
> raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
> +
> + timekeeping_active = 1;
> }
>
> /* time in seconds when suspend began for persistent clock */
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index ecb9e75..6d0ddb0 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -13,7 +13,7 @@ config PRINTK_TIME
> be included, not that the timestamp is recorded.
>
> The behavior is also controlled by the kernel command line
> - parameter printk.time=1. See Documentation/kernel-parameters.txt
> + parameter printk.time=[0-3]. See Documentation/kernel-parameters.txt
>
> config MESSAGE_LOGLEVEL_DEFAULT
> int "Default message log level (1-7)"
>