Re: [RFC][PATCH 12/13] stop_machine: Remove lglock

From: Peter Zijlstra
Date: Wed Jun 24 2015 - 04:33:20 EST


On Tue, Jun 23, 2015 at 07:23:44PM -0700, Paul E. McKenney wrote:
> And here is an untested patch that applies the gist of your approach,
> the series of stop_one_cpu() calls, but without undoing the rest.
> I forged your Signed-off-by, please let me know if that doesn't work
> for you. There are a number of simplifications that can be made, but
> the basic approach gets a good testing first.

So I really do not get the point of the trylock. It doesn't make sense.

Why would you poll the mutex instead of just wait for it and then
recheck if someone did the work while you were waiting for it?

What's wrong with the below?

---
kernel/rcu/tree.c | 100 +++++++++++++++---------------------------------------
kernel/rcu/tree.h | 1 +
2 files changed, 29 insertions(+), 72 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index add042926a66..b39a5672a7ac 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -103,6 +103,7 @@ struct rcu_state sname##_state = { \
.orphan_nxttail = &sname##_state.orphan_nxtlist, \
.orphan_donetail = &sname##_state.orphan_donelist, \
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
+ .expedited_mutex = __MUTEX_INITIALIZER(sname##_state.expedited_mutex), \
.name = RCU_STATE_NAME(sname), \
.abbr = sabbr, \
}
@@ -3304,12 +3305,9 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
*/
void synchronize_sched_expedited(void)
{
- cpumask_var_t cm;
- bool cma = false;
- int cpu;
- long firstsnap, s, snap;
- int trycount = 0;
struct rcu_state *rsp = &rcu_sched_state;
+ long s, snap;
+ int cpu;

/*
* If we are in danger of counter wrap, just do synchronize_sched().
@@ -3332,7 +3330,6 @@ void synchronize_sched_expedited(void)
* full memory barrier.
*/
snap = atomic_long_inc_return(&rsp->expedited_start);
- firstsnap = snap;
if (!try_get_online_cpus()) {
/* CPU hotplug operation in flight, fall back to normal GP. */
wait_rcu_gp(call_rcu_sched);
@@ -3341,83 +3338,40 @@ void synchronize_sched_expedited(void)
}
WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));

- /* Offline CPUs, idle CPUs, and any CPU we run on are quiescent. */
- cma = zalloc_cpumask_var(&cm, GFP_KERNEL);
- if (cma) {
- cpumask_copy(cm, cpu_online_mask);
- cpumask_clear_cpu(raw_smp_processor_id(), cm);
- for_each_cpu(cpu, cm) {
- struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
-
- if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
- cpumask_clear_cpu(cpu, cm);
- }
- if (cpumask_weight(cm) == 0)
- goto all_cpus_idle;
- }
-
/*
* Each pass through the following loop attempts to force a
* context switch on each CPU.
*/
- while (try_stop_cpus(cma ? cm : cpu_online_mask,
- synchronize_sched_expedited_cpu_stop,
- NULL) == -EAGAIN) {
- put_online_cpus();
- atomic_long_inc(&rsp->expedited_tryfail);
+ mutex_lock(&rsp->expedited_mutex);

- /* Check to see if someone else did our work for us. */
- s = atomic_long_read(&rsp->expedited_done);
- if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
- /* ensure test happens before caller kfree */
- smp_mb__before_atomic(); /* ^^^ */
- atomic_long_inc(&rsp->expedited_workdone1);
- free_cpumask_var(cm);
- return;
- }
+ /*
+ * Check to see if someone else did our work for us, while we were
+ * waiting for the mutex.
+ */
+ s = atomic_long_read(&rsp->expedited_done);
+ if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
+ /* ensure test happens before caller kfree */
+ smp_mb__before_atomic(); /* ^^^ */
+ atomic_long_inc(&rsp->expedited_workdone1);
+ goto unlock;
+ }

- /* No joy, try again later. Or just synchronize_sched(). */
- if (trycount++ < 10) {
- udelay(trycount * num_online_cpus());
- } else {
- wait_rcu_gp(call_rcu_sched);
- atomic_long_inc(&rsp->expedited_normal);
- free_cpumask_var(cm);
- return;
- }
+ /* Stop each CPU that is online, non-idle, and not us. */
+ for_each_online_cpu(cpu) {
+ struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);

- /* Recheck to see if someone else did our work for us. */
- s = atomic_long_read(&rsp->expedited_done);
- if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
- /* ensure test happens before caller kfree */
- smp_mb__before_atomic(); /* ^^^ */
- atomic_long_inc(&rsp->expedited_workdone2);
- free_cpumask_var(cm);
- return;
- }
+ /* Skip our CPU, */
+ if (raw_smp_processor_id() == cpu)
+ continue;

- /*
- * Refetching sync_sched_expedited_started allows later
- * callers to piggyback on our grace period. We retry
- * after they started, so our grace period works for them,
- * and they started after our first try, so their grace
- * period works for us.
- */
- if (!try_get_online_cpus()) {
- /* CPU hotplug operation in flight, use normal GP. */
- wait_rcu_gp(call_rcu_sched);
- atomic_long_inc(&rsp->expedited_normal);
- free_cpumask_var(cm);
- return;
- }
- snap = atomic_long_read(&rsp->expedited_start);
- smp_mb(); /* ensure read is before try_stop_cpus(). */
+ /* and any idle CPUs. */
+ if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1))
+ continue;
+
+ stop_one_cpu(cpu, synchronize_sched_expedited_cpu_stop, NULL);
}
atomic_long_inc(&rsp->expedited_stoppedcpus);

-all_cpus_idle:
- free_cpumask_var(cm);
-
/*
* Everyone up to our most recent fetch is covered by our grace
* period. Update the counter, but only if our work is still
@@ -3435,6 +3389,8 @@ void synchronize_sched_expedited(void)
}
} while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
atomic_long_inc(&rsp->expedited_done_exit);
+unlock:
+ mutex_unlock(&rsp->expedited_mutex);

put_online_cpus();
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4adb7ca0bf47..10348c081e8e 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -483,6 +483,7 @@ struct rcu_state {
/* _rcu_barrier(). */
/* End of fields guarded by barrier_mutex. */

+ struct mutex expedited_mutex; /* Serializes expediting. */
atomic_long_t expedited_start; /* Starting ticket. */
atomic_long_t expedited_done; /* Done ticket. */
atomic_long_t expedited_wrap; /* # near-wrap incidents. */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/