Re: [PATCH v5 tip/core/rcu 12/16] rcu: Make TASKS_RCU handle nohz_full= CPUs

From: Pranith Kumar
Date: Thu Aug 14 2014 - 18:56:13 EST


On Mon, Aug 11, 2014 at 6:49 PM, Paul E. McKenney
<paulmck@xxxxxxxxxxxxxxxxxx> wrote:
> From: "Paul E. McKenney" <paulmck@xxxxxxxxxxxxxxxxxx>
>
> Currently TASKS_RCU would ignore a CPU running a task in nohz_full=
> usermode execution. There would be neither a context switch nor a
> scheduling-clock interrupt to tell TASKS_RCU that the task in question
> had passed through a quiescent state. The grace period would therefore
> extend indefinitely. This commit therefore makes RCU's dyntick-idle
> subsystem record the task_struct structure of the task that is running
> in dyntick-idle mode on each CPU. The TASKS_RCU grace period can
> then access this information and record a quiescent state on
> behalf of any CPU running in dyntick-idle usermode.
>
> Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
> ---
> include/linux/init_task.h | 3 ++-
> include/linux/sched.h | 2 ++
> kernel/rcu/tree.c | 2 ++
> kernel/rcu/tree.h | 2 ++
> kernel/rcu/tree_plugin.h | 16 ++++++++++++++++
> kernel/rcu/update.c | 4 +++-
> 6 files changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/init_task.h b/include/linux/init_task.h
> index 78715ea7c30c..642828009324 100644
> --- a/include/linux/init_task.h
> +++ b/include/linux/init_task.h
> @@ -128,7 +128,8 @@ extern struct group_info init_groups;
> #define INIT_TASK_RCU_TASKS(tsk) \
> .rcu_tasks_holdout = false, \
> .rcu_tasks_holdout_list = \
> - LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list),
> + LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), \
> + .rcu_tasks_idle_cpu = -1,
> #else
> #define INIT_TASK_RCU_TASKS(tsk)
> #endif
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 3cf124389ec7..5fa041f7a034 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1277,6 +1277,7 @@ struct task_struct {
> unsigned long rcu_tasks_nvcsw;
> int rcu_tasks_holdout;
> struct list_head rcu_tasks_holdout_list;
> + int rcu_tasks_idle_cpu;
> #endif /* #ifdef CONFIG_TASKS_RCU */
>
> #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
> @@ -2021,6 +2022,7 @@ static inline void rcu_copy_process(struct task_struct *p)
> #ifdef CONFIG_TASKS_RCU
> p->rcu_tasks_holdout = false;
> INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
> + p->rcu_tasks_idle_cpu = -1;
> #endif /* #ifdef CONFIG_TASKS_RCU */
> }
>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 645a33efc0d4..0d9ee1e4f446 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -526,6 +526,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
> atomic_inc(&rdtp->dynticks);
> smp_mb__after_atomic(); /* Force ordering with next sojourn. */
> WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
> + rcu_dynticks_task_enter();
>
> /*
> * It is illegal to enter an extended quiescent state while
> @@ -642,6 +643,7 @@ void rcu_irq_exit(void)
> static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
> int user)
> {
> + rcu_dynticks_task_exit();
> smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */
> atomic_inc(&rdtp->dynticks);
> /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index 0f69a79c5b7d..37ff593b7725 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -579,6 +579,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
> static void rcu_bind_gp_kthread(void);
> static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
> static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
> +static void rcu_dynticks_task_enter(void);
> +static void rcu_dynticks_task_exit(void);
>
> #endif /* #ifndef RCU_TREE_NONCORE */
>
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index a86a363ea453..0d8ef5cb1976 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -2852,3 +2852,19 @@ static void rcu_bind_gp_kthread(void)
> set_cpus_allowed_ptr(current, cpumask_of(cpu));
> #endif /* #ifdef CONFIG_NO_HZ_FULL */
> }
> +
> +/* Record the current task on dyntick-idle entry. */
> +static void rcu_dynticks_task_enter(void)
> +{
> +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
> + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
> +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */

Shouldn't we check that the cpu is actually a nohz_full cpu, like follows:

static void rcu_dynticks_task_enter(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
- ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
+ if (tick_nohz_full_cpu(smp_processor_id())
+ ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
}

> +}
> +
> +/* Record no current task on dyntick-idle exit. */
> +static void rcu_dynticks_task_exit(void)
> +{
> +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
> + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
> +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
> +}
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index d997163c7e92..a4140f25cf1a 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -466,7 +466,9 @@ static void check_holdout_task(struct task_struct *t,
> {
> if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
> t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
> - !ACCESS_ONCE(t->on_rq)) {
> + !ACCESS_ONCE(t->on_rq) ||
> + (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
> + !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {

rcu_tasks_idle_cpu will be -1 in CONFIG_NO_HZ_FULL is not enabled. Why
are you checking both here?

> ACCESS_ONCE(t->rcu_tasks_holdout) = 0;
> list_del_rcu(&t->rcu_tasks_holdout_list);
> put_task_struct(t);
> --
> 1.8.1.5
>



--
Pranith
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/