[PATCH][RFC] printk: make pr_cont buffer per-cpu
From: Sergey Senozhatsky
Date: Mon Aug 22 2016 - 11:41:23 EST
Hello,
RFC and POC
Petr, I took a very quick look at your series [1]. I think it
won't work on some of the setups I'm toying with, where multiple CPUs
can do a simultaneous pr_cont() output. What do you about the following
approach?
(it's not really tested, I just finished writing the code. Will test it
more tomorrow. But I kinda like that at this point it removes almost
twice the code it adds, which is probably because I missed some cases).
===8<====
This patch changes pr_cont buffer to be a per-cpu variable, so
CPUs don't compete anymore for a single cont buffer. Thus we minimize
the possibility of preliminary/forced flushes of incomplete pr_cont
buffers.
The basic idea is that the first time CPU issues a pr_cont or
!LOG_NEWLINE output it starts a non-preemptible region which ends when
CPUs writes a NEW_LINE symbol.
Schematically:
pr_cont() /* preempt_disable() */
<< write to this_cpu_ptr cont buffer
pr_cont()
<< write to this_cpu_ptr cont buffer
...
pr_cont("\n") /* log_store(). preempt_enable() */
The only possible case (unless I'm missing something) for a
race is when IRQ is issuing a pr_cont() output, interrupting the CPU
which is already in the middle of pr_cont() printing.
[1]: http://marc.info/?l=linux-kernel&m=146860197621876
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
---
kernel/printk/printk.c | 193 ++++++++++++++++---------------------------------
1 file changed, 64 insertions(+), 129 deletions(-)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 0d3e026..2bf8b85 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -337,10 +337,9 @@ static int console_may_schedule;
*/
enum log_flags {
- LOG_NOCONS = 1, /* already flushed, do not print to console */
- LOG_NEWLINE = 2, /* text ended with a newline */
- LOG_PREFIX = 4, /* text started with a prefix */
- LOG_CONT = 8, /* text is a fragment of a continuation line */
+ LOG_NEWLINE = 1, /* text ended with a newline */
+ LOG_PREFIX = 2, /* text started with a prefix */
+ LOG_CONT = 4, /* text is a fragment of a continuation line */
};
struct printk_log {
@@ -1661,108 +1660,62 @@ static inline void printk_delay(void)
* though, are printed immediately to the consoles to ensure everything has
* reached the console in case of a kernel crash.
*/
-static struct cont {
+struct cont {
char buf[LOG_LINE_MAX];
size_t len; /* length == 0 means unused buffer */
- size_t cons; /* bytes written to console */
- struct task_struct *owner; /* task of first print*/
u64 ts_nsec; /* time of first print */
u8 level; /* log level of first message */
u8 facility; /* log facility of first message */
enum log_flags flags; /* prefix, newline flags */
- bool flushed:1; /* buffer sealed and committed */
-} cont;
+};
+
+static DEFINE_PER_CPU(struct cont, pcpu_cont);
+static DEFINE_PER_CPU(bool, cont_printing);
-static void cont_flush(enum log_flags flags)
+static void cont_flush(struct cont *cont, enum log_flags flags)
{
- if (cont.flushed)
- return;
- if (cont.len == 0)
+ if (cont->len == 0)
return;
- if (cont.cons) {
- /*
- * If a fragment of this line was directly flushed to the
- * console; wait for the console to pick up the rest of the
- * line. LOG_NOCONS suppresses a duplicated output.
- */
- log_store(cont.facility, cont.level, flags | LOG_NOCONS,
- cont.ts_nsec, NULL, 0, cont.buf, cont.len);
- cont.flags = flags;
- cont.flushed = true;
- } else {
- /*
- * If no fragment of this line ever reached the console,
- * just submit it to the store and free the buffer.
- */
- log_store(cont.facility, cont.level, flags, 0,
- NULL, 0, cont.buf, cont.len);
- cont.len = 0;
- }
+ /*
+ * If no fragment of this line ever reached the console,
+ * just submit it to the store and free the buffer.
+ */
+ log_store(cont->facility, cont->level, flags, 0,
+ NULL, 0, cont->buf, cont->len);
+ cont->len = 0;
}
static bool cont_add(int facility, int level, const char *text, size_t len)
{
- if (cont.len && cont.flushed)
- return false;
+ struct cont *cont = this_cpu_ptr(&pcpu_cont);
/*
* If ext consoles are present, flush and skip in-kernel
* continuation. See nr_ext_console_drivers definition. Also, if
* the line gets too long, split it up in separate records.
*/
- if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
- cont_flush(LOG_CONT);
+ if (nr_ext_console_drivers || cont->len + len > sizeof(cont->buf)) {
+ cont_flush(cont, LOG_CONT);
return false;
}
- if (!cont.len) {
- cont.facility = facility;
- cont.level = level;
- cont.owner = current;
- cont.ts_nsec = local_clock();
- cont.flags = 0;
- cont.cons = 0;
- cont.flushed = false;
+ if (!cont->len) {
+ cont->facility = facility;
+ cont->level = level;
+ cont->ts_nsec = local_clock();
+ cont->flags = 0;
}
- memcpy(cont.buf + cont.len, text, len);
- cont.len += len;
+ memcpy(cont->buf + cont->len, text, len);
+ cont->len += len;
- if (cont.len > (sizeof(cont.buf) * 80) / 100)
- cont_flush(LOG_CONT);
+ if (cont->len > (sizeof(cont->buf) * 80) / 100)
+ cont_flush(cont, LOG_CONT);
return true;
}
-static size_t cont_print_text(char *text, size_t size)
-{
- size_t textlen = 0;
- size_t len;
-
- if (cont.cons == 0 && (console_prev & LOG_NEWLINE)) {
- textlen += print_time(cont.ts_nsec, text);
- size -= textlen;
- }
-
- len = cont.len - cont.cons;
- if (len > 0) {
- if (len+1 > size)
- len = size-1;
- memcpy(text + textlen, cont.buf + cont.cons, len);
- textlen += len;
- cont.cons = cont.len;
- }
-
- if (cont.flushed) {
- if (cont.flags & LOG_NEWLINE)
- text[textlen++] = '\n';
- /* got everything, release buffer */
- cont.len = 0;
- }
- return textlen;
-}
-
asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
@@ -1779,6 +1732,7 @@ asmlinkage int vprintk_emit(int facility, int level,
int printed_len = 0;
int nmi_message_lost;
bool in_sched = false;
+ struct cont *cont;
if (level == LOGLEVEL_SCHED) {
level = LOGLEVEL_DEFAULT;
@@ -1789,6 +1743,7 @@ asmlinkage int vprintk_emit(int facility, int level,
printk_delay();
local_irq_save(flags);
+ cont = this_cpu_ptr(&pcpu_cont);
this_cpu = smp_processor_id();
/*
@@ -1878,12 +1833,22 @@ asmlinkage int vprintk_emit(int facility, int level,
lflags |= LOG_PREFIX|LOG_NEWLINE;
if (!(lflags & LOG_NEWLINE)) {
+ if (!this_cpu_read(cont_printing)) {
+ bool unsafe_pr_cont = preemptible() &&
+ !rcu_preempt_depth();
+
+ if (unsafe_pr_cont) {
+ this_cpu_write(cont_printing, true);
+ preempt_disable();
+ }
+ }
+
/*
* Flush the conflicting buffer. An earlier newline was missing,
* or another task also prints continuation lines.
*/
- if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
- cont_flush(LOG_NEWLINE);
+ if (cont->len && (lflags & LOG_PREFIX))
+ cont_flush(cont, LOG_NEWLINE);
/* buffer line if possible, otherwise store it right away */
if (cont_add(facility, level, text, text_len))
@@ -1895,6 +1860,11 @@ asmlinkage int vprintk_emit(int facility, int level,
} else {
bool stored = false;
+ if (this_cpu_read(cont_printing)) {
+ this_cpu_write(cont_printing, false);
+ preempt_enable();
+ }
+
/*
* If an earlier newline was missing and it was the same task,
* either merge it with the current buffer and flush, or if
@@ -1903,11 +1873,11 @@ asmlinkage int vprintk_emit(int facility, int level,
* If the preceding printk was from a different task and missed
* a newline, flush and append the newline.
*/
- if (cont.len) {
- if (cont.owner == current && !(lflags & LOG_PREFIX))
+ if (cont->len) {
+ if (!(lflags & LOG_PREFIX))
stored = cont_add(facility, level, text,
text_len);
- cont_flush(LOG_NEWLINE);
+ cont_flush(cont, LOG_NEWLINE);
}
if (stored)
@@ -2051,7 +2021,6 @@ static struct cont {
size_t len;
size_t cons;
u8 level;
- bool flushed:1;
} cont;
static char *log_text(const struct printk_log *msg) { return NULL; }
static char *log_dict(const struct printk_log *msg) { return NULL; }
@@ -2344,42 +2313,6 @@ static inline int can_use_console(void)
return cpu_online(raw_smp_processor_id()) || have_callable_console();
}
-static void console_cont_flush(char *text, size_t size)
-{
- unsigned long flags;
- size_t len;
-
- raw_spin_lock_irqsave(&logbuf_lock, flags);
-
- if (!cont.len)
- goto out;
-
- if (suppress_message_printing(cont.level)) {
- cont.cons = cont.len;
- if (cont.flushed)
- cont.len = 0;
- goto out;
- }
-
- /*
- * We still queue earlier records, likely because the console was
- * busy. The earlier ones need to be printed before this one, we
- * did not flush any fragment so far, so just let it queue up.
- */
- if (console_seq < log_next_seq && !cont.cons)
- goto out;
-
- len = cont_print_text(text, size);
- raw_spin_unlock(&logbuf_lock);
- stop_critical_timings();
- call_console_drivers(cont.level, NULL, 0, text, len);
- start_critical_timings();
- local_irq_restore(flags);
- return;
-out:
- raw_spin_unlock_irqrestore(&logbuf_lock, flags);
-}
-
/**
* console_unlock - unlock the console system
*
@@ -2433,9 +2366,6 @@ again:
return;
}
- /* flush buffered message fragment immediately to console */
- console_cont_flush(text, sizeof(text));
-
for (;;) {
struct printk_log *msg;
size_t ext_len = 0;
@@ -2465,8 +2395,7 @@ skip:
msg = log_from_idx(console_idx);
level = msg->level;
- if ((msg->flags & LOG_NOCONS) ||
- suppress_message_printing(level)) {
+ if (suppress_message_printing(level)) {
/*
* Skip record we have buffered and already printed
* directly to the console when we received it, and
@@ -2474,12 +2403,6 @@ skip:
*/
console_idx = log_next(console_idx);
console_seq++;
- /*
- * We will get here again when we register a new
- * CON_PRINTBUFFER console. Clear the flag so we
- * will properly dump everything later.
- */
- msg->flags &= ~LOG_NOCONS;
console_prev = msg->flags;
goto skip;
}
@@ -2581,6 +2504,18 @@ void console_unblank(void)
*/
void console_flush_on_panic(void)
{
+ unsigned long flags;
+ unsigned int cpu;
+
+ /*
+ * Flush the cont messages; no matter complete
+ * or incomplete.
+ */
+ local_irq_save(flags);
+ for_each_online_cpu(cpu)
+ cont_flush(per_cpu_ptr(&pcpu_cont, cpu), LOG_NEWLINE);
+ local_irq_restore(flags);
+
/*
* If someone else is holding the console lock, trylock will fail
* and may_schedule may be set. Ignore and proceed to unlock so
--
2.9.3