[PATCH tip/core/rcu 08/16] rcu: Remove expedited GP funnel-lock bypass

From: Paul E. McKenney
Date: Tue Apr 12 2016 - 11:31:15 EST


Commit #cdacbe1f91264 ("rcu: Add fastpath bypassing funnel locking")
turns out to be a pessimization at high load because it forces a tree
full of tasks to wait for an expedited grace period that they probably
do not need. This commit therefore removes this optimization.

Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
---
Documentation/RCU/trace.txt | 10 +++++-----
kernel/rcu/tree.c | 19 -------------------
kernel/rcu/tree.h | 1 -
kernel/rcu/tree_trace.c | 7 +++----
4 files changed, 8 insertions(+), 29 deletions(-)

diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index ec6998b1b6d0..00a3a38b375a 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -237,17 +237,17 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of

The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:

-s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
+s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872

These fields are as follows:

o "s" is the sequence number, with an odd number indicating that
an expedited grace period is in progress.

-o "wd0", "wd1", "wd2", and "wd3" are the number of times that an
- attempt to start an expedited grace period found that someone
- else had completed an expedited grace period that satisfies the
- attempted request. "Our work is done."
+o "wd1", "wd2", and "wd3" are the number of times that an attempt
+ to start an expedited grace period found that someone else had
+ completed an expedited grace period that satisfies the attempted
+ request. "Our work is done."

o "n" is number of times that a concurrent CPU-hotplug operation
forced a fallback to a normal grace period.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 524026fd9dd7..62e73e0a929f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3617,25 +3617,6 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
struct rcu_node *rnp1 = NULL;

/*
- * First try directly acquiring the root lock in order to reduce
- * latency in the common case where expedited grace periods are
- * rare. We check mutex_is_locked() to avoid pathological levels of
- * memory contention on ->exp_funnel_mutex in the heavy-load case.
- */
- rnp0 = rcu_get_root(rsp);
- if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
- if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
- trace_rcu_exp_funnel_lock(rsp->name, rnp0->level,
- rnp0->grplo, rnp0->grphi,
- TPS("acq"));
- if (sync_exp_work_done(rsp, rnp0, NULL,
- &rdp->expedited_workdone0, s))
- return NULL;
- return rnp0;
- }
- }
-
- /*
* Each pass through the following loop works its way
* up the rcu_node tree, returning if others have done the
* work or otherwise falls through holding the root rnp's
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index df668c0f9e64..ac9a7b0c36ae 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -388,7 +388,6 @@ struct rcu_data {
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
struct mutex exp_funnel_mutex;
- atomic_long_t expedited_workdone0; /* # done by others #0. */
atomic_long_t expedited_workdone1; /* # done by others #1. */
atomic_long_t expedited_workdone2; /* # done by others #2. */
atomic_long_t expedited_workdone3; /* # done by others #3. */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 1088e64f01ad..d149c412a4e5 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -185,17 +185,16 @@ static int show_rcuexp(struct seq_file *m, void *v)
int cpu;
struct rcu_state *rsp = (struct rcu_state *)m->private;
struct rcu_data *rdp;
- unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
+ unsigned long s1 = 0, s2 = 0, s3 = 0;

for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(rsp->rda, cpu);
- s0 += atomic_long_read(&rdp->expedited_workdone0);
s1 += atomic_long_read(&rdp->expedited_workdone1);
s2 += atomic_long_read(&rdp->expedited_workdone2);
s3 += atomic_long_read(&rdp->expedited_workdone3);
}
- seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
- rsp->expedited_sequence, s0, s1, s2, s3,
+ seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
+ rsp->expedited_sequence, s1, s2, s3,
atomic_long_read(&rsp->expedited_normal),
atomic_read(&rsp->expedited_need_qs),
rsp->expedited_sequence / 2);
--
2.5.2