Re: Current mainline git (24e700e291d52bd2) hangs when building e.g. perf

From: Peter Zijlstra
Date: Tue Sep 05 2017 - 04:54:23 EST


On Tue, Sep 05, 2017 at 09:27:38AM +0200, Markus Trippelsdorf wrote:
> Current mainline git (24e700e291d52bd2) hangs when building software
> concurrently (for example perf).
> The issue is not 100% reproducible (sometimes building perf succeeds),
> so bisecting will not work.

Sadly I cannot reproduce, I had:

while :; do make clean; make; done

running on tools/perf for a while, and now have:

while :; do make O=defconfig-build clean; make O=defconfig-build -j80; done

running, all smooth sailing, although there's the hope that the moment I
hit send on this email the box comes unstuck.

> Magic SysRq key doesn't work and there is nothing in the logs.
> Enabling CONFIG_PROVE_LOCKING makes the issue go away.

SysRq not working is suspicious.. and I take it the NMI watchdog also
isn't firing?

> Any ideas on how to debug this further?

So you have a (real) serial line on that box?

Could you try something like:

debug ignore_loglevel sysrq_always_enabled earlyprintk=serial,ttyS0,115200 force_early_printk

with the below patch applied? That always gives me the most reliable
output.

---
kernel/printk/printk.c | 119 +++++++++++++++++++++++++++++++++++--------------
1 file changed, 86 insertions(+), 33 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index fc47863f629c..b17099fbc7ce 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -365,6 +365,75 @@ __packed __aligned(4)
#endif
;

+#ifdef CONFIG_EARLY_PRINTK
+struct console *early_console;
+
+static bool __read_mostly force_early_printk;
+
+static int __init force_early_printk_setup(char *str)
+{
+ force_early_printk = true;
+ return 0;
+}
+early_param("force_early_printk", force_early_printk_setup);
+
+static int early_printk_cpu = -1;
+
+static int early_vprintk(const char *fmt, va_list args)
+{
+ int n, cpu, old;
+ char buf[512];
+
+ cpu = get_cpu();
+ /*
+ * Test-and-Set inter-cpu spinlock with recursion.
+ */
+ for (;;) {
+ /*
+ * c-cas to avoid the exclusive bouncing on spin.
+ * Depends on the memory barrier implied by cmpxchg
+ * for ACQUIRE semantics.
+ */
+ old = READ_ONCE(early_printk_cpu);
+ if (old == -1) {
+ old = cmpxchg(&early_printk_cpu, -1, cpu);
+ if (old == -1)
+ break;
+ }
+ /*
+ * Allow recursion for interrupts and the like.
+ */
+ if (old == cpu)
+ break;
+
+ cpu_relax();
+ }
+
+ n = vscnprintf(buf, sizeof(buf), fmt, args);
+ early_console->write(early_console, buf, n);
+
+ /*
+ * Unlock -- in case @old == @cpu, this is a no-op.
+ */
+ smp_store_release(&early_printk_cpu, old);
+ put_cpu();
+
+ return n;
+}
+
+asmlinkage __visible void early_printk(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!early_console)
+ return;
+
+ va_start(ap, fmt);
+ early_vprintk(fmt, ap);
+ va_end(ap);
+}
+#endif
+
/*
* The logbuf_lock protects kmsg buffer, indices, counters. This can be taken
* within the scheduler's rq lock. It must be released before calling
@@ -1704,6 +1773,16 @@ asmlinkage int vprintk_emit(int facility, int level,
int printed_len = 0;
bool in_sched = false;

+#ifdef CONFIG_KGDB_KDB
+ if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0))
+ return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
+#endif
+
+#ifdef CONFIG_EARLY_PRINTK
+ if (force_early_printk && early_console)
+ return early_vprintk(fmt, args);
+#endif
+
if (level == LOGLEVEL_SCHED) {
level = LOGLEVEL_DEFAULT;
in_sched = true;
@@ -1796,18 +1875,7 @@ EXPORT_SYMBOL(printk_emit);

int vprintk_default(const char *fmt, va_list args)
{
- int r;
-
-#ifdef CONFIG_KGDB_KDB
- /* Allow to pass printk() to kdb but avoid a recursion. */
- if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) {
- r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
- return r;
- }
-#endif
- r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
-
- return r;
+ return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
}
EXPORT_SYMBOL_GPL(vprintk_default);

@@ -1838,7 +1906,12 @@ asmlinkage __visible int printk(const char *fmt, ...)
int r;

va_start(args, fmt);
- r = vprintk_func(fmt, args);
+#ifdef CONFIG_EARLY_PRINTK
+ if (force_early_printk && early_console)
+ r = vprintk_default(fmt, args);
+ else
+#endif
+ r = vprintk_func(fmt, args);
va_end(args);

return r;
@@ -1875,26 +1948,6 @@ static bool suppress_message_printing(int level) { return false; }

#endif /* CONFIG_PRINTK */

-#ifdef CONFIG_EARLY_PRINTK
-struct console *early_console;
-
-asmlinkage __visible void early_printk(const char *fmt, ...)
-{
- va_list ap;
- char buf[512];
- int n;
-
- if (!early_console)
- return;
-
- va_start(ap, fmt);
- n = vscnprintf(buf, sizeof(buf), fmt, ap);
- va_end(ap);
-
- early_console->write(early_console, buf, n);
-}
-#endif
-
static int __add_preferred_console(char *name, int idx, char *options,
char *brl_options)
{