Re: linux-next: manual merge of the rcu tree with the block tree

From: Paul E. McKenney
Date: Thu Feb 11 2021 - 13:03:34 EST


On Thu, Feb 11, 2021 at 04:48:52PM +1100, Stephen Rothwell wrote:
> Hi all,
>
> Today's linux-next merge of the rcu tree got conflicts in:
>
> include/linux/rcupdate.h
> kernel/rcu/tree.c
> kernel/rcu/tree_plugin.h
>
> between commits:
>
> 3a7b5c87a0b2 ("rcu/nocb: Perform deferred wake up before last idle's need_resched() check")
> e4234f21d2ea ("rcu: Pull deferred rcuog wake up to rcu_eqs_enter() callers")
> 14bbd41d5109 ("entry/kvm: Explicitly flush pending rcuog wakeup before last rescheduling point")

Frederic had me move these out of the section of the -rcu commits for
the v5.12 merge window, saying that they were not yet ready.

Jens, are these needed to prevent failures in the block tree? If so,
there were some commits added late in v5.11 that might also get rid
of your failures. If those v5.11 commits don't help the block tree,
let's figure out what we need to do here... ;-)

> from the block tree and commits:
>
> d97b07818240 ("rcu/nocb: De-offloading CB kthread")
> 254e11efde66 ("rcu/nocb: Re-offload support")

These two are part of my pull request.

> eba362724509 ("rcu: Remove superfluous rdp fetch")

This one has some chance of going in.

> from the rcu tree.
>
> I fixed it up (see below) and can carry the fix as necessary. This
> is now fixed as far as linux-next is concerned, but any non trivial
> conflicts should be mentioned to your upstream maintainer when your tree
> is submitted for merging. You may also want to consider cooperating
> with the maintainer of the conflicting tree to minimise any particularly
> complex conflicts.

Thank you for calling our attention to this collision!

Thanx, Paul

> --
> Cheers,
> Stephen Rothwell
>
> diff --cc include/linux/rcupdate.h
> index 36c2119de702,fa819f878cb1..000000000000
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@@ -110,10 -112,12 +112,14 @@@ static inline void rcu_user_exit(void)
>
> #ifdef CONFIG_RCU_NOCB_CPU
> void rcu_init_nohz(void);
> +void rcu_nocb_flush_deferred_wakeup(void);
> + int rcu_nocb_cpu_offload(int cpu);
> + int rcu_nocb_cpu_deoffload(int cpu);
> #else /* #ifdef CONFIG_RCU_NOCB_CPU */
> static inline void rcu_init_nohz(void) { }
> +static inline void rcu_nocb_flush_deferred_wakeup(void) { }
> + static inline int rcu_nocb_cpu_offload(int cpu) { return -EINVAL; }
> + static inline int rcu_nocb_cpu_deoffload(int cpu) { return 0; }
> #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
>
> /**
> diff --cc kernel/rcu/tree.c
> index ce17b8477442,c1ae1e52f638..000000000000
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@@ -643,7 -649,7 +649,6 @@@ static noinstr void rcu_eqs_enter(bool
> instrumentation_begin();
> trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
> WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
> - rdp = this_cpu_ptr(&rcu_data);
> - do_nocb_deferred_wakeup(rdp);
> rcu_prepare_for_idle();
> rcu_preempt_deferred_qs(current);
>
> diff --cc kernel/rcu/tree_plugin.h
> index cdc1b7651c03,ba1ae1e4b215..000000000000
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@@ -2186,19 -2341,201 +2346,208 @@@ static void do_nocb_deferred_wakeup_tim
> * This means we do an inexact common-case check. Note that if
> * we miss, ->nocb_timer will eventually clean things up.
> */
> -static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
> +static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
> {
> if (rcu_nocb_need_deferred_wakeup(rdp))
> - do_nocb_deferred_wakeup_common(rdp);
> + return do_nocb_deferred_wakeup_common(rdp);
> + return false;
> +}
> +
> +void rcu_nocb_flush_deferred_wakeup(void)
> +{
> + do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data));
> }
> +EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup);
>
> + static int rdp_offload_toggle(struct rcu_data *rdp,
> + bool offload, unsigned long flags)
> + __releases(rdp->nocb_lock)
> + {
> + struct rcu_segcblist *cblist = &rdp->cblist;
> + struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
> + bool wake_gp = false;
> +
> + rcu_segcblist_offload(cblist, offload);
> +
> + if (rdp->nocb_cb_sleep)
> + rdp->nocb_cb_sleep = false;
> + rcu_nocb_unlock_irqrestore(rdp, flags);
> +
> + /*
> + * Ignore former value of nocb_cb_sleep and force wake up as it could
> + * have been spuriously set to false already.
> + */
> + swake_up_one(&rdp->nocb_cb_wq);
> +
> + raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
> + if (rdp_gp->nocb_gp_sleep) {
> + rdp_gp->nocb_gp_sleep = false;
> + wake_gp = true;
> + }
> + raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
> +
> + if (wake_gp)
> + wake_up_process(rdp_gp->nocb_gp_kthread);
> +
> + return 0;
> + }
> +
> + static int __rcu_nocb_rdp_deoffload(struct rcu_data *rdp)
> + {
> + struct rcu_segcblist *cblist = &rdp->cblist;
> + unsigned long flags;
> + int ret;
> +
> + pr_info("De-offloading %d\n", rdp->cpu);
> +
> + rcu_nocb_lock_irqsave(rdp, flags);
> + /*
> + * If there are still pending work offloaded, the offline
> + * CPU won't help much handling them.
> + */
> + if (cpu_is_offline(rdp->cpu) && !rcu_segcblist_empty(&rdp->cblist)) {
> + rcu_nocb_unlock_irqrestore(rdp, flags);
> + return -EBUSY;
> + }
> +
> + ret = rdp_offload_toggle(rdp, false, flags);
> + swait_event_exclusive(rdp->nocb_state_wq,
> + !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |
> + SEGCBLIST_KTHREAD_GP));
> + rcu_nocb_lock_irqsave(rdp, flags);
> + /* Make sure nocb timer won't stay around */
> + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_OFF);
> + rcu_nocb_unlock_irqrestore(rdp, flags);
> + del_timer_sync(&rdp->nocb_timer);
> +
> + /*
> + * Flush bypass. While IRQs are disabled and once we set
> + * SEGCBLIST_SOFTIRQ_ONLY, no callback is supposed to be
> + * enqueued on bypass.
> + */
> + rcu_nocb_lock_irqsave(rdp, flags);
> + rcu_nocb_flush_bypass(rdp, NULL, jiffies);
> + rcu_segcblist_set_flags(cblist, SEGCBLIST_SOFTIRQ_ONLY);
> + /*
> + * With SEGCBLIST_SOFTIRQ_ONLY, we can't use
> + * rcu_nocb_unlock_irqrestore() anymore. Theoretically we
> + * could set SEGCBLIST_SOFTIRQ_ONLY with cb unlocked and IRQs
> + * disabled now, but let's be paranoid.
> + */
> + raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
> +
> + return ret;
> + }
> +
> + static long rcu_nocb_rdp_deoffload(void *arg)
> + {
> + struct rcu_data *rdp = arg;
> +
> + WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
> + return __rcu_nocb_rdp_deoffload(rdp);
> + }
> +
> + int rcu_nocb_cpu_deoffload(int cpu)
> + {
> + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
> + int ret = 0;
> +
> + if (rdp == rdp->nocb_gp_rdp) {
> + pr_info("Can't deoffload an rdp GP leader (yet)\n");
> + return -EINVAL;
> + }
> + mutex_lock(&rcu_state.barrier_mutex);
> + cpus_read_lock();
> + if (rcu_rdp_is_offloaded(rdp)) {
> + if (cpu_online(cpu))
> + ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp);
> + else
> + ret = __rcu_nocb_rdp_deoffload(rdp);
> + if (!ret)
> + cpumask_clear_cpu(cpu, rcu_nocb_mask);
> + }
> + cpus_read_unlock();
> + mutex_unlock(&rcu_state.barrier_mutex);
> +
> + return ret;
> + }
> + EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);
> +
> + static int __rcu_nocb_rdp_offload(struct rcu_data *rdp)
> + {
> + struct rcu_segcblist *cblist = &rdp->cblist;
> + unsigned long flags;
> + int ret;
> +
> + /*
> + * For now we only support re-offload, ie: the rdp must have been
> + * offloaded on boot first.
> + */
> + if (!rdp->nocb_gp_rdp)
> + return -EINVAL;
> +
> + pr_info("Offloading %d\n", rdp->cpu);
> + /*
> + * Can't use rcu_nocb_lock_irqsave() while we are in
> + * SEGCBLIST_SOFTIRQ_ONLY mode.
> + */
> + raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
> + /* Re-enable nocb timer */
> + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
> + /*
> + * We didn't take the nocb lock while working on the
> + * rdp->cblist in SEGCBLIST_SOFTIRQ_ONLY mode.
> + * Every modifications that have been done previously on
> + * rdp->cblist must be visible remotely by the nocb kthreads
> + * upon wake up after reading the cblist flags.
> + *
> + * The layout against nocb_lock enforces that ordering:
> + *
> + * __rcu_nocb_rdp_offload() nocb_cb_wait()/nocb_gp_wait()
> + * ------------------------- ----------------------------
> + * WRITE callbacks rcu_nocb_lock()
> + * rcu_nocb_lock() READ flags
> + * WRITE flags READ callbacks
> + * rcu_nocb_unlock() rcu_nocb_unlock()
> + */
> + ret = rdp_offload_toggle(rdp, true, flags);
> + swait_event_exclusive(rdp->nocb_state_wq,
> + rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
> + rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
> +
> + return ret;
> + }
> +
> + static long rcu_nocb_rdp_offload(void *arg)
> + {
> + struct rcu_data *rdp = arg;
> +
> + WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
> + return __rcu_nocb_rdp_offload(rdp);
> + }
> +
> + int rcu_nocb_cpu_offload(int cpu)
> + {
> + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
> + int ret = 0;
> +
> + mutex_lock(&rcu_state.barrier_mutex);
> + cpus_read_lock();
> + if (!rcu_rdp_is_offloaded(rdp)) {
> + if (cpu_online(cpu))
> + ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp);
> + else
> + ret = __rcu_nocb_rdp_offload(rdp);
> + if (!ret)
> + cpumask_set_cpu(cpu, rcu_nocb_mask);
> + }
> + cpus_read_unlock();
> + mutex_unlock(&rcu_state.barrier_mutex);
> +
> + return ret;
> + }
> + EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
> +
> void __init rcu_init_nohz(void)
> {
> int cpu;