[PATCH tip/core/rcu 22/22] rcu: Add diagnostics for offline CPUs failing to report QS

From: Paul E. McKenney
Date: Tue Jun 26 2018 - 13:11:06 EST


CPUs are expected to report quiescent states when coming online and
when going offline, and grace-period initialization is supposed to
handle any race conditions where a CPU's ->qsmask bit is set just after
it goes offline. This commit adds diagnostics for the case where an
offline CPU nevertheless has a grace period waiting on it.

Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
---
kernel/rcu/tree.c | 22 ++++++++++++++++++++++
kernel/rcu/tree.h | 1 +
2 files changed, 23 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9e83743ca9d9..4b0aee4f9318 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1188,6 +1188,27 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
smp_store_release(ruqp, true);
}

+ /* If waiting too long on an offline CPU, complain. */
+ if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) &&
+ time_after(jiffies, rdp->rsp->gp_start + HZ)) {
+ bool onl;
+ struct rcu_node *rnp1;
+
+ WARN_ON(1); /* Offline CPUs are supposed to report QS! */
+ pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
+ __func__, rnp->grplo, rnp->grphi, rnp->level,
+ (long)rnp->gp_seq, (long)rnp->completedqs);
+ for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
+ pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n",
+ __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask);
+ onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
+ pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n",
+ __func__, rdp->cpu, ".o"[onl],
+ (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
+ (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
+ return 1; /* Break things loose after complaining. */
+ }
+
/*
* A CPU running for an extended time within the kernel can
* delay RCU grace periods. When the CPU is in NO_HZ_FULL mode,
@@ -1967,6 +1988,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
rnp->grphi, rnp->qsmask);
/* Quiescent states for tasks on any now-offline CPUs. */
mask = rnp->qsmask & ~rnp->qsmaskinitnext;
+ rnp->rcu_gp_init_mask = mask;
if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp))
rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
else
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 8077aff7ab40..d51e6edc8e83 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -90,6 +90,7 @@ struct rcu_node {
/* an rcu_data structure, otherwise, each */
/* bit corresponds to a child rcu_node */
/* structure. */
+ unsigned long rcu_gp_init_mask; /* Mask of offline CPUs at GP init. */
unsigned long qsmaskinit;
/* Per-GP initial value for qsmask. */
/* Initialized from ->qsmaskinitnext at the */
--
2.17.1