[RFC][PATCHv2 2/8] printk: introduce printing kernel thread

From: Sergey Senozhatsky
Date: Wed Mar 29 2017 - 05:29:13 EST


printk() is quite complex internally and, basically, it does two
slightly independent things:
a) adds a new message to a kernel log buffer (log_store())
b) prints kernel log messages to serial consoles (console_unlock())

while (a) is guaranteed to be executed by printk(), (b) is not, for a
variety of reasons, and, unlike log_store(), it comes at a price:

1) console_unlock() attempts to flush all pending kernel log messages
to the console. Thus, it can loop indefinitely.

2) while console_unlock() is executed on one particular CPU, printing
pending kernel log messages, other CPUs can simultaneously append new
messages to the kernel log buffer.

3) the time it takes console_unlock() to print kernel messages also
depends on the speed of the console -- which may not be fast at all.

4) console_unlock() is executed in the same context as printk(), so
it may be non-preemptible/atomic, which makes 1)-3) dangerous.

As a result, nobody knows how long a printk() call will take, so
it's not really safe to call printk() in a number of situations,
including atomic context, RCU critical sections, interrupt context,
and more.

This patch introduces a dedicated printing kernel thread - printk_kthread.
The main purpose of this kthread is to offload printing to a non-atomic
and always scheduleable context, which eliminates 4) and makes 1)-3) less
critical. printk() now just appends log messages to the kernel log buffer
and wake_up()s printk_kthread instead of locking console_sem and calling
into potentially unsafe console_unlock().

This, however, is not always safe on its own. For example, we can't call
into the scheduler from panic(), because this may cause deadlock. That's
why we introduce a concept of printk_emergency() mode, when printk()
switches back to the old behaviour (console_unlock() from vprintk_emit())
in those cases. In general, this switch happens automatically once a EMERG
log level message appears in the log buffer. Another cases when wake_up()
might not work as expected are suspend, hibernate, etc. For those situations
we provide two new functions:
-- printk_emergency_begin()
Disables printk offloading. All printk() calls will attempt
to lock the console_sem and, if successful, flush kernel log
messages.

-- printk_emergency_end()
Enables printk offloading.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
Signed-off-by: Jan Kara <jack@xxxxxxx>
---
include/linux/console.h | 3 ++
kernel/printk/printk.c | 107 ++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/include/linux/console.h b/include/linux/console.h
index 5949d1855589..f1a86944072e 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -187,6 +187,9 @@ extern bool console_suspend_enabled;
extern void suspend_console(void);
extern void resume_console(void);

+extern void printk_emergency_begin(void);
+extern void printk_emergency_end(void);
+
int mda_console_init(void);
void prom_con_init(void);

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 2d07678e9ff9..ab6b3b2a68c6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -48,6 +48,7 @@
#include <linux/sched/clock.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
+#include <linux/kthread.h>

#include <linux/uaccess.h>
#include <asm/sections.h>
@@ -402,7 +403,8 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
} while (0)

/*
- * Delayed printk version, for scheduler-internal messages:
+ * Used both for deferred printk version (scheduler-internal messages)
+ * and printk_kthread control.
*/
#define PRINTK_PENDING_WAKEUP 0x01
#define PRINTK_PENDING_OUTPUT 0x02
@@ -445,6 +447,42 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;

+static struct task_struct *printk_kthread __read_mostly;
+/*
+ * We can't call into the scheduler (wake_up() printk kthread) during
+ * suspend/kexec/etc. This temporarily switches printk to old behaviour.
+ */
+static atomic_t printk_emergency __read_mostly;
+/*
+ * Disable printk_kthread permanently. Unlike `oops_in_progress'
+ * it doesn't go back to 0.
+ */
+static bool printk_kthread_disabled __read_mostly;
+
+static inline bool printk_kthread_enabled(void)
+{
+ return !printk_kthread_disabled &&
+ printk_kthread && atomic_read(&printk_emergency) == 0;
+}
+
+/*
+ * This disables printing offloading and instead attempts
+ * to do the usual console_trylock()->console_unlock().
+ *
+ * Note, this does not stop the printk_kthread if it's already
+ * printing logbuf messages.
+ */
+void printk_emergency_begin(void)
+{
+ atomic_inc(&printk_emergency);
+}
+
+/* This re-enables printk_kthread offloading. */
+void printk_emergency_end(void)
+{
+ atomic_dec(&printk_emergency);
+}
+
/* Return log buffer address */
char *log_buf_addr_get(void)
{
@@ -1765,17 +1803,40 @@ asmlinkage int vprintk_emit(int facility, int level,

printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len);

+ /*
+ * Emergency level indicates that the system is unstable and, thus,
+ * we better stop relying on wake_up(printk_kthread) and try to do
+ * a direct printing.
+ */
+ if (level == LOGLEVEL_EMERG)
+ printk_kthread_disabled = true;
+
+ set_bit(PRINTK_PENDING_OUTPUT, &printk_pending);
logbuf_unlock_irqrestore(flags);

/* If called from the scheduler, we can not call up(). */
if (!in_sched) {
/*
- * Try to acquire and then immediately release the console
- * semaphore. The release will print out buffers and wake up
- * /dev/kmsg and syslog() users.
+ * Under heavy printing load/slow serial console/etc
+ * console_unlock() can stall CPUs, which can result in
+ * soft/hard-lockups, lost interrupts, RCU stalls, etc.
+ * Therefore we attempt to print the messages to console
+ * from a dedicated printk_kthread, which always runs in
+ * schedulable context.
*/
- if (console_trylock())
- console_unlock();
+ if (printk_kthread_enabled()) {
+ printk_safe_enter_irqsave(flags);
+ wake_up_process(printk_kthread);
+ printk_safe_exit_irqrestore(flags);
+ } else {
+ /*
+ * Try to acquire and then immediately release the
+ * console semaphore. The release will print out
+ * buffers and wake up /dev/kmsg and syslog() users.
+ */
+ if (console_trylock())
+ console_unlock();
+ }
}

return printed_len;
@@ -1882,6 +1943,9 @@ static size_t msg_print_text(const struct printk_log *msg,
bool syslog, char *buf, size_t size) { return 0; }
static bool suppress_message_printing(int level) { return false; }

+void printk_emergency_begin(void) {}
+void printk_emergency_end(void) {}
+
#endif /* CONFIG_PRINTK */

#ifdef CONFIG_EARLY_PRINTK
@@ -2164,6 +2228,13 @@ void console_unlock(void)
bool do_cond_resched, retry;

if (console_suspended) {
+ /*
+ * Avoid an infinite loop in printk_kthread function
+ * when console_unlock() cannot flush messages because
+ * we suspended consoles. Someone else will print the
+ * messages from resume_console().
+ */
+ clear_bit(PRINTK_PENDING_OUTPUT, &printk_pending);
up_console_sem();
return;
}
@@ -2182,6 +2253,7 @@ void console_unlock(void)
console_may_schedule = 0;

again:
+ clear_bit(PRINTK_PENDING_OUTPUT, &printk_pending);
/*
* We released the console_sem lock, so we need to recheck if
* cpu is online and (if not) is there at least one CON_ANYTIME
@@ -2664,8 +2736,11 @@ late_initcall(printk_late_init);
#if defined CONFIG_PRINTK
static void wake_up_klogd_work_func(struct irq_work *irq_work)
{
- if (test_and_clear_bit(PRINTK_PENDING_OUTPUT, &printk_pending)) {
- /* If trylock fails, someone else is doing the printing */
+ if (test_bit(PRINTK_PENDING_OUTPUT, &printk_pending)) {
+ /*
+ * If trylock fails, someone else is doing the printing.
+ * PRINTK_PENDING_OUTPUT bit is cleared by console_unlock().
+ */
if (console_trylock())
console_unlock();
}
@@ -2679,6 +2754,22 @@ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
.flags = IRQ_WORK_LAZY,
};

+static int printk_kthread_func(void *data)
+{
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!test_bit(PRINTK_PENDING_OUTPUT, &printk_pending))
+ schedule();
+
+ __set_current_state(TASK_RUNNING);
+
+ console_lock();
+ console_unlock();
+ }
+
+ return 0;
+}
+
void wake_up_klogd(void)
{
preempt_disable();
--
2.12.2