[PATCH v2 7/7] [TEST COMMIT] rcu: detect stuck defer_qs_pending at GP cleanup
From: Joel Fernandes
Date: Tue May 26 2026 - 18:51:23 EST
Debug-only stuck-state detector for upstream review. This commit is
NOT for merge; it is a review aid. Reviewers can enable
CONFIG_RCU_GP_CLEANUP_STALE_CHECK to gain runtime confidence in the
preceding fix commits.
Signed-off-by: Joel Fernandes <joelagnelf@xxxxxxxxxx>
---
kernel/rcu/Kconfig.debug | 11 ++++++++++
kernel/rcu/tree.c | 47 ++++++++++++++++++++++++++++++++++++++++
kernel/rcu/tree.h | 8 +++++++
3 files changed, 66 insertions(+)
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 35218ba74eb5..5a40c4fe544c 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -98,6 +98,17 @@ config RCU_TORTURE_TEST_LOG_GP
Say Y here if you want grace-period sequence numbers logged.
Say N if you are unsure.
+config RCU_GP_CLEANUP_STALE_CHECK
+ bool "Detect stuck defer_qs_pending state at GP cleanup"
+ depends on RCU_TORTURE_TEST
+ default n
+ help
+ This option adds a per-CPU instrumentation counter on every
+ PENDING -> IDLE transition of rdp->defer_qs_pending, and a
+ detector in rcu_gp_cleanup().
+
+ Say N if you are unsure.
+
config RCU_REF_SCALE_TEST
tristate "Scalability tests for read-side synchronization (RCU and others)"
depends on DEBUG_KERNEL
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d0816468ffee..1307f3fb48ac 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2146,6 +2146,52 @@ static noinline_for_stack void rcu_gp_fqs_loop(void)
}
}
+#ifdef CONFIG_RCU_GP_CLEANUP_STALE_CHECK
+/*
+ * Threshold of consecutive GPs with rdp->defer_qs_pending stuck at
+ * PENDING and no observed PENDING -> IDLE transition before WARN.
+ */
+#define RCU_DEFER_QS_STUCK_GPS_THRESHOLD 5
+
+static void rcu_gp_cleanup_stale_check(void)
+{
+ int cpu;
+ unsigned long cur_gp_seq = READ_ONCE(rcu_state.gp_seq);
+
+ for_each_online_cpu(cpu) {
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ s64 clears_now;
+ int p_now;
+
+ if (READ_ONCE(rdp->gp_seq) != cur_gp_seq) {
+ rdp->defer_qs_pending_stuck_gps = 0;
+ rdp->defer_qs_pending_clears_snap =
+ atomic64_read(&rdp->defer_qs_pending_clears);
+ continue;
+ }
+
+ clears_now = atomic64_read(&rdp->defer_qs_pending_clears);
+ p_now = READ_ONCE(rdp->defer_qs_pending);
+
+ if (p_now != DEFER_QS_PENDING ||
+ clears_now != rdp->defer_qs_pending_clears_snap) {
+ rdp->defer_qs_pending_stuck_gps = 0;
+ rdp->defer_qs_pending_clears_snap = clears_now;
+ continue;
+ }
+
+ rdp->defer_qs_pending_stuck_gps++;
+ WARN_ONCE(rdp->defer_qs_pending_stuck_gps >=
+ RCU_DEFER_QS_STUCK_GPS_THRESHOLD,
+ "RCU: defer_qs_pending STUCK on CPU %d for %u GPs (gp_seq=%lu, clears=%lld)\n",
+ cpu, rdp->defer_qs_pending_stuck_gps,
+ cur_gp_seq, clears_now);
+ }
+}
+#else
+static inline void rcu_gp_cleanup_stale_check(void) { }
+#endif /* CONFIG_RCU_GP_CLEANUP_STALE_CHECK */
+
/*
* Clean up after the old grace period.
*/
@@ -2220,6 +2266,7 @@ static noinline void rcu_gp_cleanup(void)
/* Declare grace period done, trace first to use old GP number. */
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
+ rcu_gp_cleanup_stale_check();
rcu_seq_end(&rcu_state.gp_seq);
ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
WRITE_ONCE(rcu_state.gp_state, RCU_GP_IDLE);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4069132f9d44..29d852bbe218 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -204,6 +204,11 @@ struct rcu_data {
/* period it is aware of. */
struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */
int defer_qs_pending; /* irqwork or softirq pending? */
+#ifdef CONFIG_RCU_GP_CLEANUP_STALE_CHECK
+ atomic64_t defer_qs_pending_clears;
+ s64 defer_qs_pending_clears_snap;
+ unsigned int defer_qs_pending_stuck_gps;
+#endif
struct work_struct strict_work; /* Schedule readers for strict GPs. */
/* 2) batch handling */
@@ -299,6 +304,9 @@ struct rcu_data {
static inline void rcu_defer_qs_clear(struct rcu_data *rdp)
{
WRITE_ONCE(rdp->defer_qs_pending, DEFER_QS_IDLE);
+#ifdef CONFIG_RCU_GP_CLEANUP_STALE_CHECK
+ atomic64_inc(&rdp->defer_qs_pending_clears);
+#endif
}
/* Values for nocb_defer_wakeup field in struct rcu_data. */
--
2.34.1