Re: [tip:smp/hotplug] rcu: Make CPU_DYING_IDLE an explicit call
From: Paul E. McKenney
Date: Wed Mar 02 2016 - 15:11:58 EST
On Tue, Mar 01, 2016 at 11:58:59AM -0800, tip-bot for Thomas Gleixner wrote:
> Commit-ID: 27d50c7eeb0f03c3d3ca72aac4d2dd487ca1f3f0
> Gitweb: http://git.kernel.org/tip/27d50c7eeb0f03c3d3ca72aac4d2dd487ca1f3f0
> Author: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> AuthorDate: Fri, 26 Feb 2016 18:43:44 +0000
> Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> CommitDate: Tue, 1 Mar 2016 20:36:58 +0100
>
> rcu: Make CPU_DYING_IDLE an explicit call
>
> Make the RCU CPU_DYING_IDLE callback an explicit function call, so it gets
> invoked at the proper place.
>
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: linux-arch@xxxxxxxxxxxxxxx
> Cc: Rik van Riel <riel@xxxxxxxxxx>
> Cc: Rafael Wysocki <rafael.j.wysocki@xxxxxxxxx>
> Cc: "Srivatsa S. Bhat" <srivatsa@xxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Arjan van de Ven <arjan@xxxxxxxxxxxxxxx>
> Cc: Sebastian Siewior <bigeasy@xxxxxxxxxxxxx>
> Cc: Rusty Russell <rusty@xxxxxxxxxxxxxxx>
> Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
> Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
> Cc: Tejun Heo <tj@xxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Paul McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
> Cc: Paul Turner <pjt@xxxxxxxxxx>
> Link: http://lkml.kernel.org/r/20160226182341.870167933@xxxxxxxxxxxxx
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
[ . . . ]
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index 0e8c07f..ff8059b 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -762,6 +762,7 @@ void cpuhp_report_idle_dead(void)
> BUG_ON(st->state != CPUHP_AP_OFFLINE);
> st->state = CPUHP_AP_IDLE_DEAD;
> complete(&st->done);
Not to be repetitive or anything, but if we delay here, it can break
RCU on a number of architectures. Either the CPU can be killed holding
one of RCU's locks or RCU can wrongly see the CPU as still being alive.
Either can prevent future RCU grace periods from ever completing, thus
OOMing the system.
Thanx, Paul
> + rcu_report_dead(smp_processor_id());
> }
>
> #else
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index e41dd41..85b4134 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -2607,28 +2607,6 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
> }
>
> /*
> - * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
> - * function. We now remove it from the rcu_node tree's ->qsmaskinit
> - * bit masks.
> - */
> -static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
> -{
> - unsigned long flags;
> - unsigned long mask;
> - struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
> - struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
> -
> - if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
> - return;
> -
> - /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
> - mask = rdp->grpmask;
> - raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
> - rnp->qsmaskinitnext &= ~mask;
> - raw_spin_unlock_irqrestore(&rnp->lock, flags);
> -}
> -
> -/*
> * The CPU has been completely removed, and some other CPU is reporting
> * this fact from process context. Do the remainder of the cleanup,
> * including orphaning the outgoing CPU's RCU callbacks, and also
> @@ -4247,6 +4225,43 @@ static void rcu_prepare_cpu(int cpu)
> rcu_init_percpu_data(cpu, rsp);
> }
>
> +#ifdef CONFIG_HOTPLUG_CPU
> +/*
> + * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
> + * function. We now remove it from the rcu_node tree's ->qsmaskinit
> + * bit masks.
> + */
> +static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
> +{
> + unsigned long flags;
> + unsigned long mask;
> + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
> + struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
> +
> + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
> + return;
> +
> + /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
> + mask = rdp->grpmask;
> + raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
> + rnp->qsmaskinitnext &= ~mask;
> + raw_spin_unlock_irqrestore(&rnp->lock, flags);
> +}
> +
> +void rcu_report_dead(unsigned int cpu)
> +{
> + struct rcu_state *rsp;
> +
> + /* QS for any half-done expedited RCU-sched GP. */
> + preempt_disable();
> + rcu_report_exp_rdp(&rcu_sched_state,
> + this_cpu_ptr(rcu_sched_state.rda), true);
> + preempt_enable();
> + for_each_rcu_flavor(rsp)
> + rcu_cleanup_dying_idle_cpu(cpu, rsp);
> +}
> +#endif
> +
> /*
> * Handle CPU online/offline notification events.
> */
> @@ -4278,17 +4293,6 @@ int rcu_cpu_notify(struct notifier_block *self,
> for_each_rcu_flavor(rsp)
> rcu_cleanup_dying_cpu(rsp);
> break;
> - case CPU_DYING_IDLE:
> - /* QS for any half-done expedited RCU-sched GP. */
> - preempt_disable();
> - rcu_report_exp_rdp(&rcu_sched_state,
> - this_cpu_ptr(rcu_sched_state.rda), true);
> - preempt_enable();
> -
> - for_each_rcu_flavor(rsp) {
> - rcu_cleanup_dying_idle_cpu(cpu, rsp);
> - }
> - break;
> case CPU_DEAD:
> case CPU_DEAD_FROZEN:
> case CPU_UP_CANCELED:
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index 8abbe89..bd12c6c 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -220,8 +220,6 @@ static void cpu_idle_loop(void)
> rmb();
>
> if (cpu_is_offline(smp_processor_id())) {
> - rcu_cpu_notify(NULL, CPU_DYING_IDLE,
> - (void *)(long)smp_processor_id());
> cpuhp_report_idle_dead();
> arch_cpu_idle_dead();
> }
>