[PATCH -v2 01/17] stop_machine: Add function and caller debug info

From: Peter Zijlstra
Date: Mon Oct 05 2020 - 11:09:59 EST


Crashes in stop-machine are hard to connect to the calling code, add a
little something to help with that.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/stop_machine.h | 5 +++++
kernel/stop_machine.c | 23 ++++++++++++++++++++---
lib/dump_stack.c | 2 ++
3 files changed, 27 insertions(+), 3 deletions(-)

--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg);
struct cpu_stop_work {
struct list_head list; /* cpu_stopper->works */
cpu_stop_fn_t fn;
+ unsigned long caller;
void *arg;
struct cpu_stop_done *done;
};
@@ -36,6 +37,8 @@ void stop_machine_park(int cpu);
void stop_machine_unpark(int cpu);
void stop_machine_yield(const struct cpumask *cpumask);

+extern void print_stop_info(const char *log_lvl, struct task_struct *task);
+
#else /* CONFIG_SMP */

#include <linux/workqueue.h>
@@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(u
return false;
}

+static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { }
+
#endif /* CONFIG_SMP */

/*
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -42,11 +42,23 @@ struct cpu_stopper {
struct list_head works; /* list of pending works */

struct cpu_stop_work stop_work; /* for stop_cpus */
+ unsigned long caller;
+ cpu_stop_fn_t fn;
};

static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
static bool stop_machine_initialized = false;

+void print_stop_info(const char *log_lvl, struct task_struct *task)
+{
+ struct cpu_stopper *stopper = this_cpu_ptr(&cpu_stopper);
+
+ if (task != stopper->thread)
+ return;
+
+ printk("%sStopper: %pS <- %pS\n", log_lvl, stopper->fn, (void *)stopper->caller);
+}
+
/* static data for stop_cpus */
static DEFINE_MUTEX(stop_cpus_mutex);
static bool stop_cpus_in_progress;
@@ -123,7 +135,7 @@ static bool cpu_stop_queue_work(unsigned
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
{
struct cpu_stop_done done;
- struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
+ struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done, .caller = _RET_IP_ };

cpu_stop_init_done(&done, 1);
if (!cpu_stop_queue_work(cpu, &work))
@@ -331,7 +343,8 @@ int stop_two_cpus(unsigned int cpu1, uns
work1 = work2 = (struct cpu_stop_work){
.fn = multi_cpu_stop,
.arg = &msdata,
- .done = &done
+ .done = &done,
+ .caller = _RET_IP_,
};

cpu_stop_init_done(&done, 2);
@@ -367,7 +380,7 @@ int stop_two_cpus(unsigned int cpu1, uns
bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf)
{
- *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
+ *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, .caller = _RET_IP_, };
return cpu_stop_queue_work(cpu, work_buf);
}

@@ -487,6 +500,8 @@ static void cpu_stopper_thread(unsigned
int ret;

/* cpu stop callbacks must not sleep, make in_atomic() == T */
+ stopper->caller = work->caller;
+ stopper->fn = fn;
preempt_count_inc();
ret = fn(arg);
if (done) {
@@ -495,6 +510,8 @@ static void cpu_stopper_thread(unsigned
cpu_stop_signal_done(done);
}
preempt_count_dec();
+ stopper->fn = NULL;
+ stopper->caller = 0;
WARN_ONCE(preempt_count(),
"cpu_stop: %ps(%p) leaked preempt count\n", fn, arg);
goto repeat;
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -12,6 +12,7 @@
#include <linux/atomic.h>
#include <linux/kexec.h>
#include <linux/utsname.h>
+#include <linux/stop_machine.h>

static char dump_stack_arch_desc_str[128];

@@ -57,6 +58,7 @@ void dump_stack_print_info(const char *l
log_lvl, dump_stack_arch_desc_str);

print_worker_info(log_lvl, current);
+ print_stop_info(log_lvl, current);
}

/**