Re: [PATCH v3] printk: Have printk() never buffer its data

From: Kay Sievers
Date: Tue Jun 26 2012 - 12:07:44 EST


On Mon, 2012-06-25 at 18:40 -0700, Linus Torvalds wrote:
> On Mon, Jun 25, 2012 at 5:56 PM, Kay Sievers <kay@xxxxxxxx> wrote:
> >
> > Buffering has nice effects though:
> > It makes continuation lines appear as one record in the buffer, not as
> > n individual prints with n headers.
>
> As I already mentioned, buffering for *logging* is different from
> buffering for *printing*.
>
> I think it might be a great idea to buffer for logging in order to
> generate one individual buffer record there.
>
> But it needs to be printed as it is generated.

That's a good idea.

Something like this could work - only minimally tested at this moment.

Thanks,
Kay


From: Kay Sievers <kay@xxxxxxxx>
Subject: printk: flush continuation lines immediately to console

Continuation lines are buffered internally to merge the chunked
printk()s into a single record, and to isolate potentially racy
continuation users from usual terminated line users.

This though, has the effect that partial lines are not printed to
the console in the moment they are emitted. In case the kernel
crashes in the meantime, the potentially interesting printed
information will never reach the consoles.

Here we share the continuation buffer with the console copy logic,
and partial lines are immediately flushed to the consoles, but still
buffered internally to improve the integrity of the messages.
---

kernel/printk.c | 129 +++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 101 insertions(+), 28 deletions(-)

diff --git a/kernel/printk.c b/kernel/printk.c
index a2276b9..92c34bd1 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -193,12 +193,19 @@ static int console_may_schedule;
* separated by ',', and find the message after the ';' character.
*/

+enum log_flags {
+ LOG_DEFAULT = 0,
+ LOG_NOCONS = 1, /* already flushed, do not print to console */
+};
+
struct log {
u64 ts_nsec; /* timestamp in nanoseconds */
u16 len; /* length of entire record */
u16 text_len; /* length of text buffer */
u16 dict_len; /* length of dictionary buffer */
- u16 level; /* syslog level + facility */
+ u8 facility; /* syslog facility */
+ u8 flags:5; /* internal record flags */
+ u8 level:3; /* syslog level */
};

/*
@@ -286,6 +293,7 @@ static u32 log_next(u32 idx)

/* insert record into the buffer, discard old ones, update heads */
static void log_store(int facility, int level,
+ enum log_flags flags, u64 ts_nsec,
const char *dict, u16 dict_len,
const char *text, u16 text_len)
{
@@ -329,8 +337,13 @@ static void log_store(int facility, int level,
msg->text_len = text_len;
memcpy(log_dict(msg), dict, dict_len);
msg->dict_len = dict_len;
- msg->level = (facility << 3) | (level & 7);
- msg->ts_nsec = local_clock();
+ msg->facility = facility;
+ msg->level = level & 7;
+ msg->flags = flags & 0x1f;
+ if (ts_nsec > 0)
+ msg->ts_nsec = ts_nsec;
+ else
+ msg->ts_nsec = local_clock();
memset(log_dict(msg) + dict_len, 0, pad_len);
msg->len = sizeof(struct log) + text_len + dict_len + pad_len;

@@ -446,7 +459,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
ts_usec = msg->ts_nsec;
do_div(ts_usec, 1000);
len = sprintf(user->buf, "%u,%llu,%llu;",
- msg->level, user->seq, ts_usec);
+ (msg->facility << 3) | msg->level, user->seq, ts_usec);

/* escape non-printable characters */
for (i = 0; i < msg->text_len; i++) {
@@ -787,6 +800,21 @@ static bool printk_time;
#endif
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);

+static size_t print_time(u64 ts, char *buf)
+{
+ unsigned long rem_nsec;
+
+ if (!printk_time)
+ return 0;
+
+ if (!buf)
+ return 15;
+
+ rem_nsec = do_div(ts, 1000000000);
+ return sprintf(buf, "[%5lu.%06lu] ",
+ (unsigned long)ts, rem_nsec / 1000);
+}
+
static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
{
size_t len = 0;
@@ -803,18 +831,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
}
}

- if (printk_time) {
- if (buf) {
- unsigned long long ts = msg->ts_nsec;
- unsigned long rem_nsec = do_div(ts, 1000000000);
-
- len += sprintf(buf + len, "[%5lu.%06lu] ",
- (unsigned long) ts, rem_nsec / 1000);
- } else {
- len += 15;
- }
- }
-
+ len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
return len;
}

@@ -1272,15 +1289,25 @@ static inline void printk_delay(void)
}
}

+/*
+ * Continuation lines are buffered, and not committed to the record buffer
+ * until the line is complete, or a race forces a flush. The line fragments
+ * though, are printed immediately to the consoles to ensure everything has
+ * reached the console in case of a kernel crash.
+ */
+static char cont_buf[LOG_LINE_MAX];
+static size_t cont_len;
+static size_t cont_cons;
+static size_t cont_cons_len;
+static int cont_level;
+static u64 cont_ts_nsec;
+static struct task_struct *cont_task;
+
asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
{
static int recursion_bug;
- static char cont_buf[LOG_LINE_MAX];
- static size_t cont_len;
- static int cont_level;
- static struct task_struct *cont_task;
static char textbuf[LOG_LINE_MAX];
char *text = textbuf;
size_t text_len;
@@ -1326,7 +1353,8 @@ asmlinkage int vprintk_emit(int facility, int level,
recursion_bug = 0;
printed_len += strlen(recursion_msg);
/* emit KERN_CRIT message */
- log_store(0, 2, NULL, 0, recursion_msg, printed_len);
+ log_store(0, 2, LOG_DEFAULT, 0,
+ NULL, 0, recursion_msg, printed_len);
}

/*
@@ -1369,13 +1397,17 @@ asmlinkage int vprintk_emit(int facility, int level,
* Flush earlier buffer, which is either from a
* different thread, or when we got a new prefix.
*/
- log_store(facility, cont_level, NULL, 0, cont_buf, cont_len);
+ log_store(facility, cont_level,
+ LOG_NOCONS, cont_ts_nsec,
+ NULL, 0, cont_buf, cont_len);
cont_len = 0;
}

if (!cont_len) {
+ cont_cons = 0;
cont_level = level;
cont_task = current;
+ cont_ts_nsec = local_clock();
}

/* buffer or append to earlier buffer from the same thread */
@@ -1383,6 +1415,8 @@ asmlinkage int vprintk_emit(int facility, int level,
text_len = sizeof(cont_buf) - cont_len;
memcpy(cont_buf + cont_len, text, text_len);
cont_len += text_len;
+ cont_cons_len = cont_len;
+ printed_len = text_len;
} else {
if (cont_len && cont_task == current) {
if (prefix) {
@@ -1392,6 +1426,7 @@ asmlinkage int vprintk_emit(int facility, int level,
* with an interrupt.
*/
log_store(facility, cont_level,
+ LOG_NOCONS, cont_ts_nsec,
NULL, 0, cont_buf, cont_len);
cont_len = 0;
}
@@ -1401,14 +1436,15 @@ asmlinkage int vprintk_emit(int facility, int level,
text_len = sizeof(cont_buf) - cont_len;
memcpy(cont_buf + cont_len, text, text_len);
cont_len += text_len;
+ cont_cons_len = cont_len;
log_store(facility, cont_level,
+ LOG_NOCONS, cont_ts_nsec,
NULL, 0, cont_buf, cont_len);
cont_len = 0;
- cont_task = NULL;
- printed_len = cont_len;
+ printed_len = text_len;
} else {
/* ordinary single and terminated line */
- log_store(facility, level,
+ log_store(facility, level, LOG_DEFAULT, 0,
dict, dictlen, text, text_len);
printed_len = text_len;
}
@@ -1795,6 +1831,7 @@ static u32 console_idx;
*/
void console_unlock(void)
{
+ static char text[LOG_LINE_MAX];
static u64 seen_seq;
unsigned long flags;
bool wake_klogd = false;
@@ -1807,10 +1844,37 @@ void console_unlock(void)

console_may_schedule = 0;

+ /* flush buffered message fragment immediately to console */
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
+ if (cont_cons_len && (cont_cons < cont_cons_len || !cont_len)) {
+ size_t textlen = 0;
+ size_t len;
+
+ len = cont_cons_len - cont_cons;
+ if (!cont_cons)
+ textlen = print_time(cont_ts_nsec, text);
+ memcpy(text + textlen, cont_buf + cont_cons, len);
+ textlen += len;
+ cont_cons = cont_cons_len;
+
+ if (!cont_len) {
+ /* last chunk of line; terminate */
+ text[textlen++] = '\n';
+ cont_cons_len = 0;
+ }
+ raw_spin_unlock(&logbuf_lock);
+
+ stop_critical_timings();
+ call_console_drivers(cont_level, text, textlen);
+ start_critical_timings();
+
+ local_irq_restore(flags);
+ } else
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+
again:
for (;;) {
struct log *msg;
- static char text[LOG_LINE_MAX];
size_t len;
int level;

@@ -1825,13 +1889,22 @@ again:
console_seq = log_first_seq;
console_idx = log_first_idx;
}
-
+skip:
if (console_seq == log_next_seq)
break;

msg = log_from_idx(console_idx);
- level = msg->level & 7;
+ if (msg->flags & LOG_NOCONS) {
+ /*
+ * Skip record we have buffered and already printed
+ * directly to the console when we received it.
+ */
+ console_idx = log_next(console_idx);
+ console_seq++;
+ goto skip;
+ }

+ level = msg->level;
len = msg_print_text(msg, false, text, sizeof(text));

console_idx = log_next(console_idx);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/