Re: [RFC][PATCHv5 00/13] printk: introduce printing kernel thread

From: Sergey Senozhatsky
Date: Wed Aug 23 2017 - 04:32:59 EST


On (08/15/17 11:56), Sergey Senozhatsky wrote:
> Sergey Senozhatsky (13):
> printk: move printk_pending out of per-cpu
> printk: introduce printing kernel thread
> printk: add sync printk_emergency API
> printk: add enforce_emergency parameter
> printk: enable printk offloading
> printk: register PM notifier
> printk: register syscore notifier
> printk: set watchdog_thresh as maximum value for atomic_print_limit
> printk: add auto-emergency enforcement mechanism
> printk: force printk_kthread to offload printing
> printk: always offload printing from user-space processes
> printk: do not cond_resched() when we can offload
> printk: move offloading logic to per-cpu


+ one more RFC patch
the patch attempts to move offloading from potentially unsafe/critical
sections which are not covered by emergency_begin/end. namely, those
loops that explicitly silent watchdog.

I think, printk and watchdog should know more about each other.

=== 8< === 8< ===

From: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
Subject: [PATCH] printk: add offloading watchdog API

Introduce printk_offloading watchdog API to control the behaviour
of offloading. Some of the control paths, basically, disable the
soft-lockup watchdog by calling touch_all_softlockup_watchdogs().
One such example could be sysrq-t:

__handle_sysrq()
sysrq_handle_showstate()
show_state()
show_state_filter()
touch_all_softlockup_watchdogs()

This control path deliberately and forcibly silent the watchdog
for various reasons, one of which is the fact that sysrq-t may
be called when system is in bad condition and we need to print
backtraces as soon as possible. In this case calling into the
scheduler from printk offloading may be dangerous and in general
should be avoided; besides we don't need to offload anything as
long as watchdog is happy.

This patch adds touch_printk_offloading_watchdog() call to
watchdog's touch_softlockup_watchdog(), so each time a control
path resets watchdog it also resets the printk offloading timestamp,
effective disabling printing offloading.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx>
---
include/linux/console.h | 2 ++
kernel/printk/printk.c | 39 ++++++++++++++++++++++++++++++++++-----
kernel/watchdog.c | 6 +++++-
3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/include/linux/console.h b/include/linux/console.h
index 8ce29b2381d2..7408a345f4b1 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -191,6 +191,8 @@ extern void printk_emergency_begin(void);
extern void printk_emergency_end(void);
extern int printk_emergency_begin_sync(void);
extern int printk_emergency_end_sync(void);
+extern void touch_printk_offloading_watchdog(void);
+extern void touch_printk_offloading_watchdog_on_cpu(unsigned int cpu);

int mda_console_init(void);
void prom_con_init(void);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 369e0530a84f..a4e3f84ef365 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -476,6 +476,15 @@ module_param_named(atomic_print_limit, atomic_print_limit, uint, 0644);
MODULE_PARM_DESC(atomic_print_limit,
"the number of seconds task is allowed to print messages");

+/*
+ * Current printing task, timestamp when it has started printing
+ * loop and its scheduler stats. We use it to control offloading,
+ * see console_offload_printing() for details.
+ */
+static DEFINE_PER_CPU(struct task_struct, *printing_task);
+static DEFINE_PER_CPU(unsigned long, printing_start_ts);
+static DEFINE_PER_CPU(unsigned long, saved_csw);
+
static inline bool printk_offloading_enabled(void)
{
return atomic_print_limit &&
@@ -568,6 +577,26 @@ static inline unsigned long emergency_timeout(unsigned long now,
return time_after_eq(now, ts + 10 * atomic_print_limit);
}

+/*
+ * Must be called by the watchdog. When control path calls
+ * touch_all_softlockup_watchdogs() or touch_softlockup_watchdog()
+ * to silent the watchdog we need to also reset the printk
+ * offloading counter in order to avoid printk offloading from a
+ * potentially unsafe context.
+ *
+ * preemption must be disabled.
+ */
+void touch_printk_offloading_watchdog(void)
+{
+ this_cpu_write(printing_start_ts, local_clock() >> 30LL);
+}
+
+/* Same as above, but updates`printing_start_ts' on a particular CPU. */
+void touch_printk_offloading_watchdog_on_cpu(unsigned int cpu)
+{
+ per_cpu(printing_start_ts, cpu) = local_clock() >> 30LL;
+}
+
/*
* Under heavy printing load or with a slow serial console (or both)
* console_unlock() can stall CPUs, which can result in soft/hard-lockups,
@@ -579,9 +608,6 @@ static inline unsigned long emergency_timeout(unsigned long now,
*/
static inline bool console_offload_printing(void)
{
- static DEFINE_PER_CPU(struct task_struct, *printing_task);
- static DEFINE_PER_CPU(unsigned long, printing_start_ts);
- static DEFINE_PER_CPU(unsigned long, saved_csw);
unsigned long now = local_clock() >> 30LL; /* seconds */

if (printk_kthread_should_stop())
@@ -602,7 +628,7 @@ static inline bool console_offload_printing(void)

/* A new task - reset the counters. */
if (this_cpu_read(printing_task) != current) {
- this_cpu_write(printing_start_ts, local_clock() >> 30LL);
+ touch_printk_offloading_watchdog();
this_cpu_write(saved_csw, current->nvcsw + current->nivcsw);
this_cpu_write(printing_task, current);
return false;
@@ -628,7 +654,7 @@ static inline bool console_offload_printing(void)
* back to console_unlock(), it will have another full
* `atomic_print_limit' time slice.
*/
- this_cpu_write(printing_start_ts, local_clock() >> 30LL);
+ touch_printk_offloading_watchdog();
return true;
}

@@ -2114,6 +2140,9 @@ EXPORT_SYMBOL_GPL(printk_emergency_end_sync);

static inline bool console_offload_printing(void) { return false; }

+void touch_printk_offloading_watchdog(void) {}
+void touch_printk_offloading_watchdog_on_cpu(unsigned int cpu) {}
+
#endif /* CONFIG_PRINTK */

#ifdef CONFIG_EARLY_PRINTK
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f5d52024f6b7..76c60df4b508 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -24,6 +24,7 @@
#include <linux/workqueue.h>
#include <linux/sched/clock.h>
#include <linux/sched/debug.h>
+#include <linux/console.h>

#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
@@ -268,6 +269,7 @@ void touch_softlockup_watchdog_sched(void)

void touch_softlockup_watchdog(void)
{
+ touch_printk_offloading_watchdog();
touch_softlockup_watchdog_sched();
wq_watchdog_touch(raw_smp_processor_id());
}
@@ -282,8 +284,10 @@ void touch_all_softlockup_watchdogs(void)
* do we care if a 0 races with a timestamp?
* all it means is the softlock check starts one cycle later
*/
- for_each_watchdog_cpu(cpu)
+ for_each_watchdog_cpu(cpu) {
per_cpu(watchdog_touch_ts, cpu) = 0;
+ touch_printk_offloading_watchdog_on_cpu(cpu);
+ }
wq_watchdog_touch(-1);
}

--
2.14.1