Re: [patch 59/66] rcu: Convert rcutree to hotplug state machine
From: Paul E. McKenney
Date: Mon Jul 11 2016 - 14:38:34 EST
On Mon, Jul 11, 2016 at 12:29:04PM -0000, Anna-Maria Gleixner wrote:
> From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
>
> Straight forward conversion to the state machine. Though the question arises
> whether this needs really all these state transitions to work.
>
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Reviewed-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
> Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxxxxxxxxxx>
> Signed-off-by: Anna-Maria Gleixner <anna-maria@xxxxxxxxxxxxx>
I believe that this patch breaks !SMP builds, as it has the side effect
of pulling a Tree RCU include file into Tiny RCU builds.
Some questions below, and a related patch at the end. The related patch
provides exact detection of CPUs coming online, and passes light rcutorture
testing.
Thanx, Paul
> ---
> include/linux/cpuhotplug.h | 3 +
> include/linux/rcutree.h | 15 ++++++
> kernel/cpu.c | 15 ++++++
> kernel/rcu/tree.c | 105 ++++++++++++++++++++++-----------------------
> 4 files changed, 85 insertions(+), 53 deletions(-)
>
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -15,12 +15,14 @@ enum cpuhp_state {
> CPUHP_X86_HPET_DEAD,
> CPUHP_X86_APB_DEAD,
> CPUHP_WORKQUEUE_PREP,
> + CPUHP_RCUTREE_PREP,
> CPUHP_POWER_NUMA_PREPARE,
> CPUHP_NOTIFY_PREPARE,
> CPUHP_BRINGUP_CPU,
> CPUHP_AP_IDLE_DEAD,
> CPUHP_AP_OFFLINE,
> CPUHP_AP_SCHED_STARTING,
> + CPUHP_AP_RCUTREE_DYING,
> CPUHP_AP_IRQ_GIC_STARTING,
> CPUHP_AP_IRQ_GICV3_STARTING,
> CPUHP_AP_IRQ_HIP04_STARTING,
> @@ -78,6 +80,7 @@ enum cpuhp_state {
> CPUHP_AP_PERF_ARM_CCI_ONLINE,
> CPUHP_AP_PERF_ARM_CCN_ONLINE,
> CPUHP_AP_WORKQUEUE_ONLINE,
> + CPUHP_AP_RCUTREE_ONLINE,
> CPUHP_AP_NOTIFY_ONLINE,
> CPUHP_AP_ONLINE_DYN,
> CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
The dying-idle state is still covered by direct function call, correct?
(The call to rcu_report_dead() from cpuhp_report_idle_dead().)
> --- a/include/linux/rcutree.h
> +++ b/include/linux/rcutree.h
> @@ -111,4 +111,19 @@ bool rcu_is_watching(void);
>
> void rcu_all_qs(void);
>
> +/* RCUtree hotplug events */
> +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
> +int rcutree_prepare_cpu(unsigned int cpu);
> +int rcutree_online_cpu(unsigned int cpu);
> +int rcutree_offline_cpu(unsigned int cpu);
> +int rcutree_dead_cpu(unsigned int cpu);
> +int rcutree_dying_cpu(unsigned int cpu);
> +#else
> +#define rcutree_prepare_cpu NULL
> +#define rcutree_online_cpu NULL
> +#define rcutree_offline_cpu NULL
> +#define rcutree_dead_cpu NULL
> +#define rcutree_dying_cpu NULL
> +#endif
This file is included only in CONFIG_TREE_RCU or CONFIG_PREEMPT_RCU
builds, so you should not need this ifdef.
The only other option is CONFIG_TINY_RCU, for which CONFIG_HOTPLUG_CPU
cannot possibly be set.
> +
> #endif /* __LINUX_RCUTREE_H */
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -23,6 +23,7 @@
> #include <linux/tick.h>
> #include <linux/irq.h>
> #include <linux/smpboot.h>
> +#include <linux/rcutree.h>
Ah, I see... ;-)
I am going to guess that this code was never built for CONFIG_SMP=n...
I would expect a few build errors.
I suggest moving the #ifdef from include/linux/rcutree.h to
include/linux/cpu.h. That way, you avoid including code intended
only for Tree RCU into Tiny RCU builds.
> #include <trace/events/power.h>
> #define CREATE_TRACE_POINTS
> @@ -1183,6 +1184,11 @@ static struct cpuhp_step cpuhp_bp_states
> .startup = workqueue_prepare_cpu,
> .teardown = NULL,
> },
> + [CPUHP_RCUTREE_PREP] = {
> + .name = "RCU-tree prepare",
> + .startup = rcutree_prepare_cpu,
> + .teardown = rcutree_dead_cpu,
> + },
> /*
> * Preparatory and dead notifiers. Will be replaced once the notifiers
> * are converted to states.
> @@ -1235,6 +1241,10 @@ static struct cpuhp_step cpuhp_ap_states
> .startup = sched_cpu_starting,
> .teardown = sched_cpu_dying,
> },
> + [CPUHP_AP_RCUTREE_DYING] = {
> + .startup = NULL,
> + .teardown = rcutree_dying_cpu,
> + },
> /*
> * Low level startup/teardown notifiers. Run with interrupts
> * disabled. Will be removed once the notifiers are converted to
> @@ -1268,6 +1278,11 @@ static struct cpuhp_step cpuhp_ap_states
> .startup = workqueue_online_cpu,
> .teardown = workqueue_offline_cpu,
> },
> + [CPUHP_AP_RCUTREE_ONLINE] = {
> + .name = "RCU-tree online",
> + .startup = rcutree_online_cpu,
> + .teardown = rcutree_offline_cpu,
> + },
>
> /*
> * Online/down_prepare notifiers. Will be removed once the notifiers
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1070,11 +1070,11 @@ EXPORT_SYMBOL_GPL(rcu_is_watching);
> * offline to continue to use RCU for one jiffy after marking itself
> * offline in the cpu_online_mask. This leniency is necessary given the
> * non-atomic nature of the online and offline processing, for example,
> - * the fact that a CPU enters the scheduler after completing the CPU_DYING
> - * notifiers.
> + * the fact that a CPU enters the scheduler after completing the teardown
> + * of the CPU.
> *
> - * This is also why RCU internally marks CPUs online during the
> - * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
> + * This is also why RCU internally marks CPUs online during in the
> + * preparation phase and offline after the CPU has been taken down.
> *
> * Disable checking if in an NMI handler because we cannot safely report
> * errors from NMI handlers anyway.
> @@ -4340,12 +4340,58 @@ rcu_init_percpu_data(int cpu, struct rcu
> raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> }
>
> -static void rcu_prepare_cpu(int cpu)
> +int rcutree_prepare_cpu(unsigned int cpu)
> {
> struct rcu_state *rsp;
>
> for_each_rcu_flavor(rsp)
> rcu_init_percpu_data(cpu, rsp);
> +
> + rcu_prepare_kthreads(cpu);
> + rcu_spawn_all_nocb_kthreads(cpu);
> +
> + return 0;
> +}
> +
> +static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
> +{
> + struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
> +
> + rcu_boost_kthread_setaffinity(rdp->mynode, outgoing);
> +}
> +
> +int rcutree_online_cpu(unsigned int cpu)
> +{
> + sync_sched_exp_online_cleanup(cpu);
> + rcutree_affinity_setting(cpu, -1);
> + return 0;
> +}
> +
> +int rcutree_offline_cpu(unsigned int cpu)
> +{
> + rcutree_affinity_setting(cpu, cpu);
> + return 0;
> +}
> +
> +
> +int rcutree_dying_cpu(unsigned int cpu)
> +{
> + struct rcu_state *rsp;
> +
> + for_each_rcu_flavor(rsp)
> + rcu_cleanup_dying_cpu(rsp);
> + return 0;
> +}
> +
> +int rcutree_dead_cpu(unsigned int cpu)
> +{
> + struct rcu_state *rsp;
> +
> + for_each_rcu_flavor(rsp) {
> + rcu_cleanup_dead_cpu(cpu, rsp);
> + do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu));
> + }
> + return 0;
> }
>
> #ifdef CONFIG_HOTPLUG_CPU
> @@ -4388,52 +4434,6 @@ void rcu_report_dead(unsigned int cpu)
> }
> #endif
>
> -/*
> - * Handle CPU online/offline notification events.
> - */
> -int rcu_cpu_notify(struct notifier_block *self,
> - unsigned long action, void *hcpu)
> -{
> - long cpu = (long)hcpu;
> - struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
> - struct rcu_node *rnp = rdp->mynode;
> - struct rcu_state *rsp;
> -
> - switch (action) {
> - case CPU_UP_PREPARE:
> - case CPU_UP_PREPARE_FROZEN:
> - rcu_prepare_cpu(cpu);
> - rcu_prepare_kthreads(cpu);
> - rcu_spawn_all_nocb_kthreads(cpu);
> - break;
> - case CPU_ONLINE:
> - case CPU_DOWN_FAILED:
> - sync_sched_exp_online_cleanup(cpu);
> - rcu_boost_kthread_setaffinity(rnp, -1);
> - break;
> - case CPU_DOWN_PREPARE:
> - rcu_boost_kthread_setaffinity(rnp, cpu);
> - break;
> - case CPU_DYING:
> - case CPU_DYING_FROZEN:
> - for_each_rcu_flavor(rsp)
> - rcu_cleanup_dying_cpu(rsp);
> - break;
> - case CPU_DEAD:
> - case CPU_DEAD_FROZEN:
> - case CPU_UP_CANCELED:
> - case CPU_UP_CANCELED_FROZEN:
> - for_each_rcu_flavor(rsp) {
> - rcu_cleanup_dead_cpu(cpu, rsp);
> - do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu));
> - }
> - break;
> - default:
> - break;
> - }
> - return NOTIFY_OK;
> -}
> -
> static int rcu_pm_notify(struct notifier_block *self,
> unsigned long action, void *hcpu)
> {
> @@ -4745,10 +4745,9 @@ void __init rcu_init(void)
> * this is called early in boot, before either interrupts
> * or the scheduler are operational.
> */
> - cpu_notifier(rcu_cpu_notify, 0);
> pm_notifier(rcu_pm_notify, 0);
> for_each_online_cpu(cpu)
> - rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
> + rcutree_prepare_cpu(cpu);
> }
>
> #include "tree_plugin.h"
------------------------------------------------------------------------
commit da7095f39456dd0f28fa21697f2f976a61bc6d0a
Author: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Date: Thu Jun 30 13:58:26 2016 -0700
rcu: Exact CPU-online tracking for RCU
Up to now, RCU has assumed that the CPU-online process makes it from
CPU_UP_PREPARE to set_cpu_online() within one jiffy. Given the recent
rise of virtualized environments, this assumption is very clearly
obsolete.
This commit therefore updates RCU's internal CPU state-tracking
information at notify_cpu_starting() time, thus providing exact
tracking of the CPU state from an RCU perspective.
Note that this means that incoming CPUs must not use RCU read-side
critical section (other than those of SRCU) until notify_cpu_starting()
time. Note that the CPU_STARTING notifiers -are- allowed to use
RCU read-side critical sections.
If a given architecture or CPU family needs to use RCU read-side critical
sections earlier, the call to rcu_cpu_starting() from notify_cpu_starting()
will need to be architecture-specific, with architectures that opt out
being required to hand-place the call to rcu_cpu_starting() at some point
preceding the call to notify_cpu_starting().
Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 3bc5de08c0b7..0f2d86d1ca6e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -334,6 +334,7 @@ void rcu_sched_qs(void);
void rcu_bh_qs(void);
void rcu_check_callbacks(int user);
void rcu_report_dead(unsigned int cpu);
+void rcu_cpu_starting(unsigned int cpu);
#ifndef CONFIG_TINY_RCU
void rcu_end_inkernel_boot(void);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d948e44c471e..703453233530 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -882,6 +882,7 @@ void notify_cpu_starting(unsigned int cpu)
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
+ rcu_cpu_starting(cpu); /* All CPU_STARTING notifiers can use RCU. */
while (st->state < target) {
struct cpuhp_step *step;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3121242b8579..5e7c1d6a6108 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3793,8 +3793,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
rnp = rdp->mynode;
mask = rdp->grpmask;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
- rnp->qsmaskinitnext |= mask;
- rnp->expmaskinitnext |= mask;
if (!rdp->beenonline)
WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
rdp->beenonline = true; /* We have now been online. */
@@ -3815,6 +3813,32 @@ static void rcu_prepare_cpu(int cpu)
rcu_init_percpu_data(cpu, rsp);
}
+/*
+ * Mark the specified CPU as being online so that subsequent grace periods
+ * (both expedited and normal) will wait on it. Note that this means that
+ * incoming CPUs are not allowed to use RCU read-side critical sections
+ * until this function is called. Failing to observe this restriction
+ * will result in lockdep splats.
+ */
+void rcu_cpu_starting(unsigned int cpu)
+{
+ unsigned long flags;
+ unsigned long mask;
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+ struct rcu_state *rsp;
+
+ for_each_rcu_flavor(rsp) {
+ rdp = this_cpu_ptr(rsp->rda);
+ rnp = rdp->mynode;
+ mask = rdp->grpmask;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ rnp->qsmaskinitnext |= mask;
+ rnp->expmaskinitnext |= mask;
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ }
+}
+
#ifdef CONFIG_HOTPLUG_CPU
/*
* The CPU is exiting the idle loop into the arch_cpu_idle_dead()
@@ -4211,8 +4235,10 @@ void __init rcu_init(void)
*/
cpu_notifier(rcu_cpu_notify, 0);
pm_notifier(rcu_pm_notify, 0);
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
+ rcu_cpu_starting(cpu);
+ }
}
#include "tree_exp.h"