[PATCH tip/core/rcu 12/40] rcu: Pull rcu_qs_ctr into rcu_dynticks structure

From: Paul E. McKenney
Date: Wed Apr 12 2017 - 13:44:52 EST


The rcu_qs_ctr variable is yet another isolated per-CPU variable,
so this commit pulls it into the pre-existing rcu_dynticks per-CPU
structure.

Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
---
.../RCU/Design/Data-Structures/Data-Structures.html | 12 ++++++++++--
kernel/rcu/tree.c | 15 ++++++---------
kernel/rcu/tree.h | 3 ++-
kernel/rcu/tree_trace.c | 4 +---
4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index bf7f266e8888..3d0311657533 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -1105,6 +1105,7 @@ Its fields are as follows:
2 int dynticks_nmi_nesting;
3 atomic_t dynticks;
4 int rcu_sched_qs_mask;
+ 5 unsigned long rcu_qs_ctr;
</pre>

<p>The <tt>-&gt;dynticks_nesting</tt> field counts the
@@ -1123,12 +1124,19 @@ CPU's transitions to and from dyntick-idle mode, so that this counter
has an even value when the CPU is in dyntick-idle mode and an odd
value otherwise.

-</p><p>Finally, the <tt>-&gt;rcu_sched_qs_mask</tt> field is used
+</p><p>The <tt>-&gt;rcu_sched_qs_mask</tt> field is used
to record the fact that the RCU core code would really like to
-see a quiescent state from the corresponding CPU.
+see a quiescent state from the corresponding CPU, so much so that
+it is willing to call for heavy-weight dyntick-counter operations.
This flag is checked by RCU's context-switch and <tt>cond_resched()</tt>
code, which provide a momentary idle sojourn in response.

+</p><p>Finally the <tt>-&gt;rcu_qs_ctr</tt> field is used to record
+quiescent states from <tt>cond_resched()</tt>.
+Because <tt>cond_resched()</tt> can execute quite frequently, this
+must be quite lightweight, as in a non-atomic increment of this
+per-CPU field.
+
<table>
<tr><th>&nbsp;</th></tr>
<tr><th align="left">Quick Quiz:</th></tr>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 315647d4e4cd..be40ebb4f0bf 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -436,9 +436,6 @@ bool rcu_eqs_special_set(int cpu)
return true;
}

-DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
-EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr);
-
/*
* Let the RCU core know that this CPU has gone through the scheduler,
* which is a quiescent state. This is called when the need for a
@@ -552,7 +549,7 @@ void rcu_all_qs(void)
do_nocb_deferred_wakeup(this_cpu_ptr(rsp->rda));
preempt_enable();
}
- this_cpu_inc(rcu_qs_ctr);
+ this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
barrier(); /* Avoid RCU read-side critical sections leaking up. */
}
EXPORT_SYMBOL_GPL(rcu_all_qs);
@@ -1325,7 +1322,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
*/
rnp = rdp->mynode;
if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
- READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_qs_ctr, rdp->cpu) &&
+ READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) {
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
return 1;
@@ -2034,7 +2031,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
need_gp = !!(rnp->qsmask & rdp->grpmask);
rdp->cpu_no_qs.b.norm = need_gp;
- rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
+ rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
rdp->core_needs_qs = need_gp;
zero_cpu_stall_ticks(rdp);
WRITE_ONCE(rdp->gpwrap, false);
@@ -2632,7 +2629,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
* within the current grace period.
*/
rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */
- rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
+ rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
@@ -3630,7 +3627,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
/* Is the RCU core waiting for a quiescent state from this CPU? */
if (rcu_scheduler_fully_active &&
rdp->core_needs_qs && rdp->cpu_no_qs.b.norm &&
- rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) {
+ rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_dynticks.rcu_qs_ctr)) {
rdp->n_rp_core_needs_qs++;
} else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) {
rdp->n_rp_report_qs++;
@@ -3943,7 +3940,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */
rdp->completed = rnp->completed;
rdp->cpu_no_qs.b.norm = true;
- rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu);
+ rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
rdp->core_needs_qs = false;
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e298281984dc..76e4467bc765 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -113,7 +113,8 @@ struct rcu_dynticks {
/* Process level is worth LLONG_MAX/2. */
int dynticks_nmi_nesting; /* Track NMI nesting level. */
atomic_t dynticks; /* Even value for idle, else odd. */
- int rcu_sched_qs_mask; /* GP old, need quiescent state. */
+ int rcu_sched_qs_mask; /* GP old, need heavy quiescent state. */
+ unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */
#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
long long dynticks_idle_nesting;
/* irq/process nesting level from idle. */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 8751a748499a..65b43be38e68 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -45,8 +45,6 @@
#define RCU_TREE_NONCORE
#include "tree.h"

-DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
-
static int r_open(struct inode *inode, struct file *file,
const struct seq_operations *op)
{
@@ -121,7 +119,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
cpu_is_offline(rdp->cpu) ? '!' : ' ',
ulong2long(rdp->completed), ulong2long(rdp->gpnum),
rdp->cpu_no_qs.b.norm,
- rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu),
+ rdp->rcu_qs_ctr_snap == per_cpu(rdp->dynticks->rcu_qs_ctr, rdp->cpu),
rdp->core_needs_qs);
seq_printf(m, " dt=%d/%llx/%d df=%lu",
rcu_dynticks_snap(rdp->dynticks),
--
2.5.2