Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree

From: Paul E. McKenney
Date: Sun Mar 24 2019 - 19:34:37 EST


On Fri, Mar 22, 2019 at 05:25:19PM -0700, Paul E. McKenney wrote:
> On Fri, Mar 22, 2019 at 07:48:19PM -0400, Joel Fernandes wrote:
> > On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > > Running RCU out of softirq is a problem for some workloads that would
> > > like to manage RCU core processing independently of other softirq
> > > work, for example, setting kthread priority. This commit therefore
> > > introduces the `rcunosoftirq' option which moves the RCU core work
> > > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > > The SCHED_OTHER approach avoids the scalability problems that appeared
> > > with the earlier attempt to move RCU core processing to from softirq
> > > to kthreads. That said, kernels built with RCU_BOOST=y will run the
> > > rcuc kthreads at the RCU-boosting priority.
> > [snip]
> > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > index 0f31b79eb6761..05a1e42fdaf10 100644
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > > @@ -51,6 +51,12 @@
> > > #include <linux/tick.h>
> > > #include <linux/sysrq.h>
> > > #include <linux/kprobes.h>
> > > +#include <linux/gfp.h>
> > > +#include <linux/oom.h>
> > > +#include <linux/smpboot.h>
> > > +#include <linux/jiffies.h>
> > > +#include <linux/sched/isolation.h>
> > > +#include "../time/tick-internal.h"
> > >
> > > #include "tree.h"
> > > #include "rcu.h"
> > > @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
> > > /* Dump rcu_node combining tree at boot to verify correct setup. */
> > > static bool dump_tree;
> > > module_param(dump_tree, bool, 0444);
> > > +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> > > +static bool use_softirq = 1;
> > > +module_param(use_softirq, bool, 0444);
> > > /* Control rcu_node-tree auto-balancing at boot time. */
> > > static bool rcu_fanout_exact;
> > > module_param(rcu_fanout_exact, bool, 0444);
> > > @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
> > > EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
> > >
> > > /* Perform RCU core processing work for the current CPU. */
> > > -static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > +static __latent_entropy void rcu_core(void)
> > > {
> > > unsigned long flags;
> > > struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > > @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > trace_rcu_utilization(TPS("End RCU core"));
> > > }
> > >
> > > +static void rcu_core_si(struct softirq_action *h)
> > > +{
> > > + rcu_core();
> > > +}
> > > +
> > > +static void rcu_wake_cond(struct task_struct *t, int status)
> > > +{
> > > + /*
> > > + * If the thread is yielding, only wake it when this
> > > + * is invoked from idle
> > > + */
> > > + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > > + wake_up_process(t);
> > > +}
> > > +
> > > +static void invoke_rcu_core_kthread(void)
> > > +{
> > > + struct task_struct *t;
> > > + unsigned long flags;
> > > +
> > > + local_irq_save(flags);
> > > + __this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > > + t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > > + if (t != NULL && t != current)
> > > + rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > > + local_irq_restore(flags);
> > > +}
> > > +
> > > /*
> > > * Schedule RCU callback invocation. If the running implementation of RCU
> > > * does not support RCU priority boosting, just do a direct call, otherwise
> > > @@ -2306,19 +2343,95 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > > {
> > > if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > > return;
> > > - if (likely(!rcu_state.boost)) {
> > > - rcu_do_batch(rdp);
> > > - return;
> > > - }
> > > - invoke_rcu_callbacks_kthread();
> > > + if (rcu_state.boost || !use_softirq)
> > > + invoke_rcu_core_kthread();
> > > + rcu_do_batch(rdp);
> >
> > Shouldn't there be an else before the rcu_do_batch? If we are waking up the
> > rcuc thread, then that will do the rcu_do_batch when it runs right?
> >
> > Something like:
> > if (rcu_state.boost || !use_softirq)
> > invoke_rcu_core_kthread();
> > else
> > rcu_do_batch(rdp);
> >
> > Previous code similarly had a return; also.
>
> I believe that you are correct, so I will give it a shot. Good eyes!

Yet rcutorture disagrees. Actually, if we are using rcuc kthreads, this
is only ever invoked from within tha thread, so the only check we need is
for the scheduler being operational. I am therefore trying this one out.

Thoughts?

Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 76d6c0902f66..8d6ebc0944ec 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2333,18 +2333,16 @@ static void invoke_rcu_core_kthread(void)
}

/*
- * Schedule RCU callback invocation. If the running implementation of RCU
- * does not support RCU priority boosting, just do a direct call, otherwise
- * wake up the per-CPU kernel kthread. Note that because we are running
- * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
- * cannot disappear out from under us.
+ * Do RCU callback invocation. Not that if we are running !use_softirq,
+ * we are already in the rcuc kthread. If callbacks are offloaded, then
+ * ->cblist is always empty, so we don't get here. Therefore, we only
+ * ever need to check for the scheduler being operational (some callbacks
+ * do wakeups, so we do need the scheduler).
*/
static void invoke_rcu_callbacks(struct rcu_data *rdp)
{
if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
return;
- if (rcu_state.boost || !use_softirq)
- invoke_rcu_core_kthread();
rcu_do_batch(rdp);
}