Re: [PATCH] printk/nmi: restore printk_func in nmi_panic

From: Sergey Senozhatsky
Date: Tue Mar 01 2016 - 04:26:36 EST


Hello,

On (02/29/16 20:19), Sergey Senozhatsky wrote:
[..]
> > That is the problem. zap_locks() is not a solution.
> >
> > First, it handles only lockbuf_lock and console_sem. There are other
> > locks used by particular consoles that might cause a deadlock.
>
> yes, well, that's true for panic() in general.

Petr, what do you think of this (added PRINTK_NMI_FLUSH_ON_PANIC)?

1) zap_locks() in console_flush_on_panic()
2) add PRINTK_NMI_FLUSH_ON_PANIC symbols
3) add printk_nmi_flush_on_panic()

---
include/linux/printk.h | 2 ++
init/Kconfig | 9 +++++++++
kernel/printk/nmi.c | 31 ++++++++++++++++++++++---------
kernel/printk/printk.c | 27 ++++++++++++++++-----------
4 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 51dd6b8..19d52d4 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -127,11 +127,13 @@ extern void printk_nmi_init(void);
extern void printk_nmi_enter(void);
extern void printk_nmi_exit(void);
extern void printk_nmi_flush(void);
+extern void printk_nmi_flush_on_panic(void);
#else
static inline void printk_nmi_init(void) { }
static inline void printk_nmi_enter(void) { }
static inline void printk_nmi_exit(void) { }
static inline void printk_nmi_flush(void) { }
+extern void printk_nmi_flush_on_panic(void) { }
#endif /* PRINTK_NMI */

#ifdef CONFIG_PRINTK
diff --git a/init/Kconfig b/init/Kconfig
index 651ec15..05921a6 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -883,6 +883,15 @@ config NMI_LOG_BUF_SHIFT
13 => 8 KB for each CPU
12 => 4 KB for each CPU

+config PRINTK_NMI_FLUSH_ON_PANIC
+ bool "Try to flush NMI logs to consoles on panic"
+ depends on PRINTK_NMI
+ help
+ Attempt to flush (printk) NMI per-cpu logs to consoles on panic.
+ This may be risky and may end up in deadlock; printk will attempt
+ to zap its locks to reduce this possibility, but there are still
+ chances left.
+
#
# Architectures with an unreliable sched_clock() should select this:
#
diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c
index 98e9e80..9da8be3 100644
--- a/kernel/printk/nmi.c
+++ b/kernel/printk/nmi.c
@@ -52,6 +52,15 @@ struct nmi_seq_buf {
static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);

/*
+ * The lock has two functions. First, one reader has to flush all
+ * available message to make the lockless synchronization with
+ * writers easier. Second, we do not want to mix messages from
+ * different CPUs. This is especially important when printing
+ * a backtrace.
+ */
+static DEFINE_RAW_SPINLOCK(read_lock);
+
+/*
* Safe printk() for NMI context. It uses a per-CPU buffer to
* store the message. NMIs are not nested, so there is always only
* one writer running. But the buffer might get flushed from another
@@ -115,19 +124,10 @@ static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end)
*/
static void __printk_nmi_flush(struct irq_work *work)
{
- static raw_spinlock_t read_lock =
- __RAW_SPIN_LOCK_INITIALIZER(read_lock);
struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work);
size_t len, size;
int i, last_i;

- /*
- * The lock has two functions. First, one reader has to flush all
- * available message to make the lockless synchronization with
- * writers easier. Second, we do not want to mix messages from
- * different CPUs. This is especially important when printing
- * a backtrace.
- */
raw_spin_lock(&read_lock);

i = 0;
@@ -220,3 +220,16 @@ void printk_nmi_exit(void)
{
this_cpu_write(printk_func, vprintk_default);
}
+
+void printk_nmi_flush_on_panic(void)
+{
+ if (!IS_ENABLED(CONFIG_PRINTK_NMI_FLUSH_ON_PANIC))
+ return;
+
+ raw_spin_lock(&read_lock);
+ printk_nmi_exit();
+ printk_nmi_irq_ready = 0;
+ raw_spin_unlock(&read_lock);
+
+ printk_nmi_flush();
+}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 9917f69..fc5e6d4 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1462,6 +1462,15 @@ static void call_console_drivers(int level,
}
}

+static void __zap_locks(void)
+{
+ debug_locks_off();
+ /* If a crash is occurring, make sure we can't deadlock */
+ raw_spin_lock_init(&logbuf_lock);
+ /* And make sure that we print immediately */
+ sema_init(&console_sem, 1);
+}
+
/*
* Zap console related locks when oopsing.
* To leave time for slow consoles to print a full oops,
@@ -1477,11 +1486,7 @@ static void zap_locks(void)

oops_timestamp = jiffies;

- debug_locks_off();
- /* If a crash is occurring, make sure we can't deadlock */
- raw_spin_lock_init(&logbuf_lock);
- /* And make sure that we print immediately */
- sema_init(&console_sem, 1);
+ __zap_locks();
}

int printk_delay_msec __read_mostly;
@@ -2386,15 +2391,15 @@ void console_unblank(void)
*/
void console_flush_on_panic(void)
{
- /*
- * If someone else is holding the console lock, trylock will fail
- * and may_schedule may be set. Ignore and proceed to unlock so
- * that messages are flushed out. As this can be called from any
- * context and we don't want to get preempted while flushing,
- * ensure may_schedule is cleared.
+ __zap_locks();
+
+ /* As this can be called from any context and we don't want
+ * to get preempted while flushing, ensure may_schedule is
+ * cleared.
*/
console_trylock();
console_may_schedule = 0;
+ printk_nmi_flush_on_panic();
console_unlock();
}

--
2.8.0.rc0