Re: [PATCH v5 tip/core/rcu 12/16] rcu: Make TASKS_RCU handle nohz_full= CPUs

From: Paul E. McKenney
Date: Thu Aug 14 2014 - 19:16:30 EST


On Thu, Aug 14, 2014 at 06:55:35PM -0400, Pranith Kumar wrote:
> On Mon, Aug 11, 2014 at 6:49 PM, Paul E. McKenney
> <paulmck@xxxxxxxxxxxxxxxxxx> wrote:
> > From: "Paul E. McKenney" <paulmck@xxxxxxxxxxxxxxxxxx>
> >
> > Currently TASKS_RCU would ignore a CPU running a task in nohz_full=
> > usermode execution. There would be neither a context switch nor a
> > scheduling-clock interrupt to tell TASKS_RCU that the task in question
> > had passed through a quiescent state. The grace period would therefore
> > extend indefinitely. This commit therefore makes RCU's dyntick-idle
> > subsystem record the task_struct structure of the task that is running
> > in dyntick-idle mode on each CPU. The TASKS_RCU grace period can
> > then access this information and record a quiescent state on
> > behalf of any CPU running in dyntick-idle usermode.
> >
> > Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
> > ---
> > include/linux/init_task.h | 3 ++-
> > include/linux/sched.h | 2 ++
> > kernel/rcu/tree.c | 2 ++
> > kernel/rcu/tree.h | 2 ++
> > kernel/rcu/tree_plugin.h | 16 ++++++++++++++++
> > kernel/rcu/update.c | 4 +++-
> > 6 files changed, 27 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/init_task.h b/include/linux/init_task.h
> > index 78715ea7c30c..642828009324 100644
> > --- a/include/linux/init_task.h
> > +++ b/include/linux/init_task.h
> > @@ -128,7 +128,8 @@ extern struct group_info init_groups;
> > #define INIT_TASK_RCU_TASKS(tsk) \
> > .rcu_tasks_holdout = false, \
> > .rcu_tasks_holdout_list = \
> > - LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list),
> > + LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), \
> > + .rcu_tasks_idle_cpu = -1,
> > #else
> > #define INIT_TASK_RCU_TASKS(tsk)
> > #endif
> > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > index 3cf124389ec7..5fa041f7a034 100644
> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -1277,6 +1277,7 @@ struct task_struct {
> > unsigned long rcu_tasks_nvcsw;
> > int rcu_tasks_holdout;
> > struct list_head rcu_tasks_holdout_list;
> > + int rcu_tasks_idle_cpu;
> > #endif /* #ifdef CONFIG_TASKS_RCU */
> >
> > #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
> > @@ -2021,6 +2022,7 @@ static inline void rcu_copy_process(struct task_struct *p)
> > #ifdef CONFIG_TASKS_RCU
> > p->rcu_tasks_holdout = false;
> > INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
> > + p->rcu_tasks_idle_cpu = -1;
> > #endif /* #ifdef CONFIG_TASKS_RCU */
> > }
> >
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 645a33efc0d4..0d9ee1e4f446 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -526,6 +526,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
> > atomic_inc(&rdtp->dynticks);
> > smp_mb__after_atomic(); /* Force ordering with next sojourn. */
> > WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
> > + rcu_dynticks_task_enter();
> >
> > /*
> > * It is illegal to enter an extended quiescent state while
> > @@ -642,6 +643,7 @@ void rcu_irq_exit(void)
> > static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
> > int user)
> > {
> > + rcu_dynticks_task_exit();
> > smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */
> > atomic_inc(&rdtp->dynticks);
> > /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
> > diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> > index 0f69a79c5b7d..37ff593b7725 100644
> > --- a/kernel/rcu/tree.h
> > +++ b/kernel/rcu/tree.h
> > @@ -579,6 +579,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
> > static void rcu_bind_gp_kthread(void);
> > static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
> > static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
> > +static void rcu_dynticks_task_enter(void);
> > +static void rcu_dynticks_task_exit(void);
> >
> > #endif /* #ifndef RCU_TREE_NONCORE */
> >
> > diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> > index a86a363ea453..0d8ef5cb1976 100644
> > --- a/kernel/rcu/tree_plugin.h
> > +++ b/kernel/rcu/tree_plugin.h
> > @@ -2852,3 +2852,19 @@ static void rcu_bind_gp_kthread(void)
> > set_cpus_allowed_ptr(current, cpumask_of(cpu));
> > #endif /* #ifdef CONFIG_NO_HZ_FULL */
> > }
> > +
> > +/* Record the current task on dyntick-idle entry. */
> > +static void rcu_dynticks_task_enter(void)
> > +{
> > +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
> > + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
> > +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
>
> Shouldn't we check that the cpu is actually a nohz_full cpu, like follows:
>
> static void rcu_dynticks_task_enter(void)
> {
> #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
> - ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
> + if (tick_nohz_full_cpu(smp_processor_id())
> + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
> #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
> }

No need. We put any required idle-task checks on the
rcu_tasks_kthread() slow path, not on this fast path. Keep in mind
that the tick_nohz_full_cpu() check is not necessarily cheaper than just
doing the store.

> > +}
> > +
> > +/* Record no current task on dyntick-idle exit. */
> > +static void rcu_dynticks_task_exit(void)
> > +{
> > +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
> > + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
> > +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
> > +}
> > diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> > index d997163c7e92..a4140f25cf1a 100644
> > --- a/kernel/rcu/update.c
> > +++ b/kernel/rcu/update.c
> > @@ -466,7 +466,9 @@ static void check_holdout_task(struct task_struct *t,
> > {
> > if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
> > t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
> > - !ACCESS_ONCE(t->on_rq)) {
> > + !ACCESS_ONCE(t->on_rq) ||
> > + (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
> > + !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
>
> rcu_tasks_idle_cpu will be -1 in CONFIG_NO_HZ_FULL is not enabled. Why
> are you checking both here?

If CONFIG_NO_HZ_FULL is not enabled, the remainder of the condition
is dead code. If CONFIG_NO_HZ_FULL -is- enabled, and if idle tasks
got on the list somehow, this allows the stall warning code to
complain about this.

Thanx, Paul

> > ACCESS_ONCE(t->rcu_tasks_holdout) = 0;
> > list_del_rcu(&t->rcu_tasks_holdout_list);
> > put_task_struct(t);
> > --
> > 1.8.1.5
> >
>
>
>
> --
> Pranith
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/