Re: [PATCH,RFC] RCU-based detection of stalled CPUs for Classic RCU

From: Paul E. McKenney
Date: Fri Oct 03 2008 - 11:12:43 EST


On Fri, Oct 03, 2008 at 06:12:28PM +0800, Lai Jiangshan wrote:
> I found a magic number in it.

Good catch, Jiangshan!!! See below for responses, and please see the
end of this email for an untested patch.

> Paul E. McKenney wrote:
> > Hello again!
> >
> > This patch adds stalled-CPU detection to Classic RCU. This capability
> > is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR,
> > which defaults disabled. This is a debugging feature, not something
> > that non-kernel-hackers would be expected to care about. This feature
> > can detect looping CPUs in !PREEMPT builds and looping CPUs with
> > preemption disabled in PREEMPT builds. This is essentially a port of
> > this functionality from the treercu patch.
> >
> > This version uses jiffies rather than get_seconds(), which eliminates
> > the spurious boot-time CPU stall warnings seen on some systems with
> > the previous patch.
> >
> > This is still against v2.6.27-rc8 -- I will do a version against tip
> > this evening (Pacific Time) when I get back to the combination of better
> > bandwidth and AC power.
> >
> > Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
> > ---
> >
> > include/linux/rcuclassic.h | 9 ++++
> > kernel/rcuclassic.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
> > lib/Kconfig.debug | 13 ++++++
> > 3 files changed, 110 insertions(+)
> >
> > diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
> > index 4ab8436..cab055b 100644
> > --- a/include/linux/rcuclassic.h
> > +++ b/include/linux/rcuclassic.h
> > @@ -40,6 +40,10 @@
> > #include <linux/cpumask.h>
> > #include <linux/seqlock.h>
> >
> > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> > +#define RCU_SECONDS_TILL_STALL_CHECK 3 * HZ /* for rcp->jiffies_stall */
> > +#define RCU_SECONDS_TILL_STALL_RECHECK 30 * HZ /* for rcp->jiffies_stall */
> > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> >
> > /* Global control variables for rcupdate callback mechanism. */
> > struct rcu_ctrlblk {
> > @@ -52,6 +56,11 @@ struct rcu_ctrlblk {
> > spinlock_t lock ____cacheline_internodealigned_in_smp;
> > cpumask_t cpumask; /* CPUs that need to switch in order */
> > /* for current batch to proceed. */
> > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> > + unsigned long gp_start; /* Time at which GP started in jiffies. */
> > + unsigned long jiffies_stall;
> > + /* Time at which to check for CPU stalls. */
> > +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> > } ____cacheline_internodealigned_in_smp;
> >
> > /* Is batch a before batch b ? */
> > diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
> > index aad93cd..a299876 100644
> > --- a/kernel/rcuclassic.c
> > +++ b/kernel/rcuclassic.c
> > @@ -118,6 +118,87 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
> > }
> > #endif
> >
> > +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
> > +
> > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> > +{
> > + rcp->gp_start = jiffies;
> > + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
> > +}
> > +
> > +static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
> > +{
> > + int cpu;
> > + long delta;
> > + unsigned long flags;
> > +
> > + /* Only let one CPU complain about others per time interval. */
> > +
> > + spin_lock_irqsave(&rcp->lock, flags);
> > + delta = jiffies - rcp->jiffies_stall;
> > + if (delta < 2 || rcp->cur != rcp->completed) {
>
> Is it (2 * HZ)?
> should it be defined as macro?

It should be "2", though a macro might be good. The reason for "2" is
that it guarantees that the other CPU had a full period to respond, so
it would be "2" regardless of the time units.

> > + spin_unlock_irqrestore(&rcp->lock, flags);
> > + return;
> > + }
> > + rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> > + spin_unlock_irqrestore(&rcp->lock, flags);
> > +
> > + /* OK, time to rat on our buddy... */
> > +
> > + printk(KERN_ERR "RCU detected CPU stalls:");
> > + for_each_possible_cpu(cpu) {
> > + if (cpu_isset(cpu, rcp->cpumask))
> > + printk(" %d", cpu);
> > + }
> > + printk(" (detected by %d, t=%ld jiffies)\n",
> > + smp_processor_id(), (long)(jiffies - rcp->gp_start));
> > +}
> > +
> > +static void print_cpu_stall(struct rcu_ctrlblk *rcp)
> > +{
> > + unsigned long flags;
> > +
> > + printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
> > + smp_processor_id(), jiffies,
> > + jiffies - rcp->gp_start);
> > + dump_stack();
> > + spin_lock_irqsave(&rcp->lock, flags);
> > + if ((long)(jiffies - rcp->jiffies_stall) >= 0)
> > + rcp->jiffies_stall =
> > + jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
> > + spin_unlock_irqrestore(&rcp->lock, flags);
> > + set_need_resched(); /* kick ourselves to get things going. */
> > +}
> > +
> > +static void check_cpu_stall(struct rcu_ctrlblk *rcp)
> > +{
> > + long delta;
> > +
> > + delta = jiffies - rcp->jiffies_stall;
> > + if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
> > +
> > + /* We haven't checked in, so go dump stack. */
> > + print_cpu_stall(rcp);
> > +
> > + } else if (rcp->cur != rcp->completed && delta >= 2) {
> > +
> > + /* They had two seconds to dump stack, so complain. */
>
> appear here again! and it's inconsistent with comment.

Indeed! The comment is wrong.

> > + print_other_cpu_stall(rcp);
> > + }
> > +}
> > +
> > +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> > +
> > +static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
> > +{
> > +}
> > +
> > +static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> > +{
> > +}
> > +
> > +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
> > +
> > /**
> > * call_rcu - Queue an RCU callback for invocation after a grace period.
> > * @head: structure to be used for queueing the RCU updates.
> > @@ -285,6 +366,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
> > */
> > smp_wmb();
> > rcp->cur++;
> > + record_gp_stall_check_time(rcp);
> >
> > /*
> > * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
> > @@ -468,6 +550,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
> >
> > static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> > {
> > + /* Check for CPU stalls, if enabled. */
> > + check_cpu_stall(rcp);
> > +
> > /* This cpu has pending rcu entries and the grace period
> > * for them has completed.
> > */
> > @@ -558,6 +643,9 @@ void rcu_check_callbacks(int cpu, int user)
> > static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
> > struct rcu_data *rdp)
> > {
> > +#ifdef CONFIG_DEBUG_RCU_STALL
> > + printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
> > +#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
> > memset(rdp, 0, sizeof(*rdp));
> > rdp->curtail = &rdp->curlist;
> > rdp->nxttail = &rdp->nxtlist;
> > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > index 0b50481..9fee969 100644
> > --- a/lib/Kconfig.debug
> > +++ b/lib/Kconfig.debug
> > @@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
> > Say N here if you want the RCU torture tests to start only
> > after being manually enabled via /proc.
> >
> > +config RCU_CPU_STALL_DETECTOR
> > + bool "Check for stalled CPUs delaying RCU grace periods"
> > + depends on CLASSIC_RCU
> > + default n
> > + help
> > + This option causes RCU to printk information on which
> > + CPUs are delaying the current grace period, but only when
> > + the grace period extends for excessive time periods.
> > +
> > + Say Y if you want RCU to perform such checks.
> > +
> > + Say N if you are unsure.
> > +
> > config KPROBES_SANITY_TEST
> > bool "Kprobes sanity tests"
> > depends on DEBUG_KERNEL

Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
---

include/linux/rcuclassic.h | 4 ++++
kernel/rcuclassic.c | 6 +++---
2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 8388647..69e1929 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -43,6 +43,10 @@
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
#define RCU_SECONDS_TILL_STALL_CHECK (3 * HZ) /* for rcp->jiffies_stall */
#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */
+#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
+ /* to take at least one */
+ /* scheduling clock irq */
+ /* before ratting on them. */
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */

/* Global control variables for rcupdate callback mechanism. */
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 0ce0802..8c2d15d 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -136,7 +136,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)

spin_lock_irqsave(&rcp->lock, flags);
delta = jiffies - rcp->seconds_stall;
- if (delta < 2 || rcp->cur != rcp->completed) {
+ if (delta < RCU_STALL_RAT_DELAY || rcp->cur != rcp->completed) {
spin_unlock_irqrestore(&rcp->lock, flags);
return;
}
@@ -180,9 +180,9 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
/* We haven't checked in, so go dump stack. */
print_cpu_stall(rcp);

- } else if (rcp->cur != rcp->completed && delta >= 2) {
+ } else if (rcp->cur != rcp->completed && delta >= RCU_STALL_RAT_DELAY) {

- /* They had two seconds to dump stack, so complain. */
+ /* They had two time units to dump stack, so complain. */
print_other_cpu_stall(rcp);
}
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/