Re: [PATCH v3] printk: Have printk() never buffer its data

From: Steven Rostedt
Date: Wed Jun 27 2012 - 11:13:58 EST


On Tue, 2012-06-26 at 18:07 +0200, Kay Sievers wrote:

> From: Kay Sievers <kay@xxxxxxxx>
> Subject: printk: flush continuation lines immediately to console
>
> Continuation lines are buffered internally to merge the chunked
> printk()s into a single record, and to isolate potentially racy
> continuation users from usual terminated line users.
>
> This though, has the effect that partial lines are not printed to
> the console in the moment they are emitted. In case the kernel
> crashes in the meantime, the potentially interesting printed
> information will never reach the consoles.
>
> Here we share the continuation buffer with the console copy logic,
> and partial lines are immediately flushed to the consoles, but still
> buffered internally to improve the integrity of the messages.

I just tested this patch and it boots fine for me. Better still, I put
in my 'delay' to make sure the output shows up as it is printed, and it
seems to do so.

Tested-by: Steven Rostedt <rostedt@xxxxxxxxxxx>

-- Steve

> ---
>
> kernel/printk.c | 129 +++++++++++++++++++++++++++++++++++++++++++------------
> 1 file changed, 101 insertions(+), 28 deletions(-)
>
> diff --git a/kernel/printk.c b/kernel/printk.c
> index a2276b9..92c34bd1 100644
> --- a/kernel/printk.c
> +++ b/kernel/printk.c
> @@ -193,12 +193,19 @@ static int console_may_schedule;
> * separated by ',', and find the message after the ';' character.
> */
>
> +enum log_flags {
> + LOG_DEFAULT = 0,
> + LOG_NOCONS = 1, /* already flushed, do not print to console */
> +};
> +
> struct log {
> u64 ts_nsec; /* timestamp in nanoseconds */
> u16 len; /* length of entire record */
> u16 text_len; /* length of text buffer */
> u16 dict_len; /* length of dictionary buffer */
> - u16 level; /* syslog level + facility */
> + u8 facility; /* syslog facility */
> + u8 flags:5; /* internal record flags */
> + u8 level:3; /* syslog level */
> };
>
> /*
> @@ -286,6 +293,7 @@ static u32 log_next(u32 idx)
>
> /* insert record into the buffer, discard old ones, update heads */
> static void log_store(int facility, int level,
> + enum log_flags flags, u64 ts_nsec,
> const char *dict, u16 dict_len,
> const char *text, u16 text_len)
> {
> @@ -329,8 +337,13 @@ static void log_store(int facility, int level,
> msg->text_len = text_len;
> memcpy(log_dict(msg), dict, dict_len);
> msg->dict_len = dict_len;
> - msg->level = (facility << 3) | (level & 7);
> - msg->ts_nsec = local_clock();
> + msg->facility = facility;
> + msg->level = level & 7;
> + msg->flags = flags & 0x1f;
> + if (ts_nsec > 0)
> + msg->ts_nsec = ts_nsec;
> + else
> + msg->ts_nsec = local_clock();
> memset(log_dict(msg) + dict_len, 0, pad_len);
> msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
>
> @@ -446,7 +459,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
> ts_usec = msg->ts_nsec;
> do_div(ts_usec, 1000);
> len = sprintf(user->buf, "%u,%llu,%llu;",
> - msg->level, user->seq, ts_usec);
> + (msg->facility << 3) | msg->level, user->seq, ts_usec);
>
> /* escape non-printable characters */
> for (i = 0; i < msg->text_len; i++) {
> @@ -787,6 +800,21 @@ static bool printk_time;
> #endif
> module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
>
> +static size_t print_time(u64 ts, char *buf)
> +{
> + unsigned long rem_nsec;
> +
> + if (!printk_time)
> + return 0;
> +
> + if (!buf)
> + return 15;
> +
> + rem_nsec = do_div(ts, 1000000000);
> + return sprintf(buf, "[%5lu.%06lu] ",
> + (unsigned long)ts, rem_nsec / 1000);
> +}
> +
> static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
> {
> size_t len = 0;
> @@ -803,18 +831,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
> }
> }
>
> - if (printk_time) {
> - if (buf) {
> - unsigned long long ts = msg->ts_nsec;
> - unsigned long rem_nsec = do_div(ts, 1000000000);
> -
> - len += sprintf(buf + len, "[%5lu.%06lu] ",
> - (unsigned long) ts, rem_nsec / 1000);
> - } else {
> - len += 15;
> - }
> - }
> -
> + len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
> return len;
> }
>
> @@ -1272,15 +1289,25 @@ static inline void printk_delay(void)
> }
> }
>
> +/*
> + * Continuation lines are buffered, and not committed to the record buffer
> + * until the line is complete, or a race forces a flush. The line fragments
> + * though, are printed immediately to the consoles to ensure everything has
> + * reached the console in case of a kernel crash.
> + */
> +static char cont_buf[LOG_LINE_MAX];
> +static size_t cont_len;
> +static size_t cont_cons;
> +static size_t cont_cons_len;
> +static int cont_level;
> +static u64 cont_ts_nsec;
> +static struct task_struct *cont_task;
> +
> asmlinkage int vprintk_emit(int facility, int level,
> const char *dict, size_t dictlen,
> const char *fmt, va_list args)
> {
> static int recursion_bug;
> - static char cont_buf[LOG_LINE_MAX];
> - static size_t cont_len;
> - static int cont_level;
> - static struct task_struct *cont_task;
> static char textbuf[LOG_LINE_MAX];
> char *text = textbuf;
> size_t text_len;
> @@ -1326,7 +1353,8 @@ asmlinkage int vprintk_emit(int facility, int level,
> recursion_bug = 0;
> printed_len += strlen(recursion_msg);
> /* emit KERN_CRIT message */
> - log_store(0, 2, NULL, 0, recursion_msg, printed_len);
> + log_store(0, 2, LOG_DEFAULT, 0,
> + NULL, 0, recursion_msg, printed_len);
> }
>
> /*
> @@ -1369,13 +1397,17 @@ asmlinkage int vprintk_emit(int facility, int level,
> * Flush earlier buffer, which is either from a
> * different thread, or when we got a new prefix.
> */
> - log_store(facility, cont_level, NULL, 0, cont_buf, cont_len);
> + log_store(facility, cont_level,
> + LOG_NOCONS, cont_ts_nsec,
> + NULL, 0, cont_buf, cont_len);
> cont_len = 0;
> }
>
> if (!cont_len) {
> + cont_cons = 0;
> cont_level = level;
> cont_task = current;
> + cont_ts_nsec = local_clock();
> }
>
> /* buffer or append to earlier buffer from the same thread */
> @@ -1383,6 +1415,8 @@ asmlinkage int vprintk_emit(int facility, int level,
> text_len = sizeof(cont_buf) - cont_len;
> memcpy(cont_buf + cont_len, text, text_len);
> cont_len += text_len;
> + cont_cons_len = cont_len;
> + printed_len = text_len;
> } else {
> if (cont_len && cont_task == current) {
> if (prefix) {
> @@ -1392,6 +1426,7 @@ asmlinkage int vprintk_emit(int facility, int level,
> * with an interrupt.
> */
> log_store(facility, cont_level,
> + LOG_NOCONS, cont_ts_nsec,
> NULL, 0, cont_buf, cont_len);
> cont_len = 0;
> }
> @@ -1401,14 +1436,15 @@ asmlinkage int vprintk_emit(int facility, int level,
> text_len = sizeof(cont_buf) - cont_len;
> memcpy(cont_buf + cont_len, text, text_len);
> cont_len += text_len;
> + cont_cons_len = cont_len;
> log_store(facility, cont_level,
> + LOG_NOCONS, cont_ts_nsec,
> NULL, 0, cont_buf, cont_len);
> cont_len = 0;
> - cont_task = NULL;
> - printed_len = cont_len;
> + printed_len = text_len;
> } else {
> /* ordinary single and terminated line */
> - log_store(facility, level,
> + log_store(facility, level, LOG_DEFAULT, 0,
> dict, dictlen, text, text_len);
> printed_len = text_len;
> }
> @@ -1795,6 +1831,7 @@ static u32 console_idx;
> */
> void console_unlock(void)
> {
> + static char text[LOG_LINE_MAX];
> static u64 seen_seq;
> unsigned long flags;
> bool wake_klogd = false;
> @@ -1807,10 +1844,37 @@ void console_unlock(void)
>
> console_may_schedule = 0;
>
> + /* flush buffered message fragment immediately to console */
> + raw_spin_lock_irqsave(&logbuf_lock, flags);
> + if (cont_cons_len && (cont_cons < cont_cons_len || !cont_len)) {
> + size_t textlen = 0;
> + size_t len;
> +
> + len = cont_cons_len - cont_cons;
> + if (!cont_cons)
> + textlen = print_time(cont_ts_nsec, text);
> + memcpy(text + textlen, cont_buf + cont_cons, len);
> + textlen += len;
> + cont_cons = cont_cons_len;
> +
> + if (!cont_len) {
> + /* last chunk of line; terminate */
> + text[textlen++] = '\n';
> + cont_cons_len = 0;
> + }
> + raw_spin_unlock(&logbuf_lock);
> +
> + stop_critical_timings();
> + call_console_drivers(cont_level, text, textlen);
> + start_critical_timings();
> +
> + local_irq_restore(flags);
> + } else
> + raw_spin_unlock_irqrestore(&logbuf_lock, flags);
> +
> again:
> for (;;) {
> struct log *msg;
> - static char text[LOG_LINE_MAX];
> size_t len;
> int level;
>
> @@ -1825,13 +1889,22 @@ again:
> console_seq = log_first_seq;
> console_idx = log_first_idx;
> }
> -
> +skip:
> if (console_seq == log_next_seq)
> break;
>
> msg = log_from_idx(console_idx);
> - level = msg->level & 7;
> + if (msg->flags & LOG_NOCONS) {
> + /*
> + * Skip record we have buffered and already printed
> + * directly to the console when we received it.
> + */
> + console_idx = log_next(console_idx);
> + console_seq++;
> + goto skip;
> + }
>
> + level = msg->level;
> len = msg_print_text(msg, false, text, sizeof(text));
>
> console_idx = log_next(console_idx);
>


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/