[PATCH v3] rcu: Add per-CPU rcuc task info to RCU CPU stall warnings

From: Zqiang
Date: Mon Jan 24 2022 - 22:16:34 EST


When the 'use_softirq' be set zero, all RCU_SOFTIRQ processing
be moved to per-CPU rcuc kthreads, if the rcuc kthreads is
being starved, quiescent state can not report in time. the
RCU stall may be triggered. this commit adds a stack trace of
this CPU and dump rcuc kthreads stack to help analyze what
prevents rcuc kthreads from running.

Suggested-by: Ammar Faizi <ammarfaizi2@xxxxxxxxxxx>
Signed-off-by: Zqiang <qiang1.zhang@xxxxxxxxx>
---
v1->v2:
Avoid print anything when CPU is offline or idle.
v2->v3:
Make code more compact.

kernel/rcu/tree.c | 3 +++
kernel/rcu/tree.h | 1 +
kernel/rcu/tree_plugin.h | 3 +++
kernel/rcu/tree_stall.h | 35 +++++++++++++++++++++++++++++++++++
4 files changed, 42 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a4c25a6283b0..e3fc31a0f546 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2850,10 +2850,12 @@ static void rcu_cpu_kthread(unsigned int cpu)
{
unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+ unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity);
int spincnt;

trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
for (spincnt = 0; spincnt < 10; spincnt++) {
+ WRITE_ONCE(*j, jiffies);
local_bh_disable();
*statusp = RCU_KTHREAD_RUNNING;
local_irq_disable();
@@ -2874,6 +2876,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
schedule_timeout_idle(2);
trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
*statusp = RCU_KTHREAD_WAITING;
+ WRITE_ONCE(*j, jiffies);
}

static struct smp_hotplug_thread rcu_cpu_thread_spec = {
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 486fc901bd08..4e0fdebb62e8 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -237,6 +237,7 @@ struct rcu_data {
/* rcuc per-CPU kthread or NULL. */
unsigned int rcu_cpu_kthread_status;
char rcu_cpu_has_work;
+ unsigned long rcuc_activity;

/* 7) Diagnostic data, including RCU CPU stall warnings. */
unsigned int softirq_snap; /* Snapshot of softirq activity. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c5b45c2f68a1..327bbfd79cc6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -996,12 +996,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
*/
static void rcu_cpu_kthread_setup(unsigned int cpu)
{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
#ifdef CONFIG_RCU_BOOST
struct sched_param sp;

sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
#endif /* #ifdef CONFIG_RCU_BOOST */
+
+ WRITE_ONCE(rdp->rcuc_activity, jiffies);
}

#ifdef CONFIG_RCU_BOOST
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 21bebf7c9030..0c5d8516516a 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp)
return j > 2 * HZ;
}

+static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp)
+{
+ unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity);
+
+ if (jp)
+ *jp = j;
+ return j > 2 * HZ;
+}
+
/*
* Print out diagnostic information for the specified stalled CPU.
*
@@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu)
falsepositive ? " (false positive?)" : "");
}

+static void rcuc_kthread_dump(struct rcu_data *rdp)
+{
+ int cpu;
+ unsigned long j;
+ struct task_struct *rcuc;
+
+ rcuc = rdp->rcu_cpu_kthread_task;
+ if (!rcuc)
+ return;
+
+ cpu = task_cpu(rcuc);
+ if (cpu_is_offline(cpu) || idle_cpu(cpu))
+ return;
+
+ if (!rcu_is_rcuc_kthread_starving(rdp, &j))
+ return;
+
+ pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j);
+ sched_show_task(rcuc);
+ if (!trigger_single_cpu_backtrace(cpu))
+ dump_cpu_task(cpu);
+}
+
/* Complain about starvation of grace-period kthread. */
static void rcu_check_gp_kthread_starvation(void)
{
@@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps)
rcu_check_gp_kthread_expired_fqs_timer();
rcu_check_gp_kthread_starvation();

+ if (!use_softirq)
+ rcuc_kthread_dump(rdp);
+
rcu_dump_cpu_stacks();

raw_spin_lock_irqsave_rcu_node(rnp, flags);
--
2.25.1