Re: [PATCH 10/21] context_tracking: Take idle eqs entrypoints over RCU

From: Mark Rutland
Date: Tue May 03 2022 - 07:03:05 EST


Hi Frederic,

On Tue, May 03, 2022 at 12:00:40PM +0200, Frederic Weisbecker wrote:
> The RCU dynticks counter is going to be merged into the context tracking
> subsystem. Start with moving the idle extended quiescent states
> entrypoints to context tracking. For now those are dumb redirections to
> existing RCU calls.

I was a bit confused looking at this, because that redirection only exists for
CONFIG_CONTEXT_TRACKING, and is empty otherwise.

I see this patch makes TREE_RCU select CONTEXT_TRACKING, which means that
works. Since that also means building the rest of the context tracking code, I
think it'd be worth mentioning that in the commit message.

Do all architectures which can use TREE_RCU today already support context
tracking? If not, do those work by default?

Thanks,
Mark.

> Signed-off-by: Frederic Weisbecker <frederic@xxxxxxxxxx>
> Cc: Paul E. McKenney <paulmck@xxxxxxxxxx>
> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Neeraj Upadhyay <quic_neeraju@xxxxxxxxxxx>
> Cc: Uladzislau Rezki <uladzislau.rezki@xxxxxxxx>
> Cc: Joel Fernandes <joel@xxxxxxxxxxxxxxxxx>
> Cc: Boqun Feng <boqun.feng@xxxxxxxxx>
> Cc: Nicolas Saenz Julienne <nsaenz@xxxxxxxxxx>
> Cc: Marcelo Tosatti <mtosatti@xxxxxxxxxx>
> Cc: Xiongfeng Wang <wangxiongfeng2@xxxxxxxxxx>
> Cc: Yu Liao<liaoyu15@xxxxxxxxxx>
> Cc: Phil Auld <pauld@xxxxxxxxxx>
> Cc: Paul Gortmaker<paul.gortmaker@xxxxxxxxxxxxx>
> Cc: Alex Belits <abelits@xxxxxxxxxxx>
> ---
> Documentation/RCU/stallwarn.rst | 4 ++--
> arch/arm/mach-imx/cpuidle-imx6q.c | 5 +++--
> drivers/acpi/processor_idle.c | 5 +++--
> drivers/cpuidle/cpuidle.c | 9 +++++----
> include/linux/context_tracking.h | 8 ++++++++
> include/linux/rcupdate.h | 2 +-
> kernel/context_tracking.c | 12 ++++++++++++
> kernel/locking/lockdep.c | 2 +-
> kernel/rcu/Kconfig | 2 ++
> kernel/rcu/tree.c | 2 --
> kernel/rcu/update.c | 2 +-
> kernel/sched/idle.c | 10 +++++-----
> kernel/sched/sched.h | 1 +
> 13 files changed, 44 insertions(+), 20 deletions(-)
>
> diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst
> index 1d863b04727c..bdd52b40f307 100644
> --- a/Documentation/RCU/stallwarn.rst
> +++ b/Documentation/RCU/stallwarn.rst
> @@ -97,8 +97,8 @@ warnings:
> which will include additional debugging information.
>
> - A low-level kernel issue that either fails to invoke one of the
> - variants of rcu_user_enter(), rcu_user_exit(), rcu_idle_enter(),
> - rcu_idle_exit(), rcu_irq_enter(), or rcu_irq_exit() on the one
> + variants of rcu_user_enter(), rcu_user_exit(), ct_idle_enter(),
> + ct_idle_exit(), rcu_irq_enter(), or rcu_irq_exit() on the one
> hand, or that invokes one of them too many times on the other.
> Historically, the most frequent issue has been an omission
> of either irq_enter() or irq_exit(), which in turn invoke
> diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c
> index 094337dc1bc7..d086cbae09c3 100644
> --- a/arch/arm/mach-imx/cpuidle-imx6q.c
> +++ b/arch/arm/mach-imx/cpuidle-imx6q.c
> @@ -3,6 +3,7 @@
> * Copyright (C) 2012 Freescale Semiconductor, Inc.
> */
>
> +#include <linux/context_tracking.h>
> #include <linux/cpuidle.h>
> #include <linux/module.h>
> #include <asm/cpuidle.h>
> @@ -24,9 +25,9 @@ static int imx6q_enter_wait(struct cpuidle_device *dev,
> imx6_set_lpm(WAIT_UNCLOCKED);
> raw_spin_unlock(&cpuidle_lock);
>
> - rcu_idle_enter();
> + ct_idle_enter();
> cpu_do_idle();
> - rcu_idle_exit();
> + ct_idle_exit();
>
> raw_spin_lock(&cpuidle_lock);
> if (num_idle_cpus-- == num_online_cpus())
> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
> index 32b20efff5f8..935f4113d5f6 100644
> --- a/drivers/acpi/processor_idle.c
> +++ b/drivers/acpi/processor_idle.c
> @@ -22,6 +22,7 @@
> #include <linux/cpu.h>
> #include <linux/minmax.h>
> #include <acpi/processor.h>
> +#include <linux/context_tracking.h>
>
> /*
> * Include the apic definitions for x86 to have the APIC timer related defines
> @@ -648,11 +649,11 @@ static int acpi_idle_enter_bm(struct cpuidle_driver *drv,
> raw_spin_unlock(&c3_lock);
> }
>
> - rcu_idle_enter();
> + ct_idle_enter();
>
> acpi_idle_do_entry(cx);
>
> - rcu_idle_exit();
> + ct_idle_exit();
>
> /* Re-enable bus master arbitration */
> if (dis_bm) {
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index ef2ea1b12cd8..62dd956025f3 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -23,6 +23,7 @@
> #include <linux/suspend.h>
> #include <linux/tick.h>
> #include <linux/mmu_context.h>
> +#include <linux/context_tracking.h>
> #include <trace/events/power.h>
>
> #include "cpuidle.h"
> @@ -150,12 +151,12 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
> */
> stop_critical_timings();
> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
> - rcu_idle_enter();
> + ct_idle_enter();
> target_state->enter_s2idle(dev, drv, index);
> if (WARN_ON_ONCE(!irqs_disabled()))
> local_irq_disable();
> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
> - rcu_idle_exit();
> + ct_idle_exit();
> tick_unfreeze();
> start_critical_timings();
>
> @@ -233,10 +234,10 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
>
> stop_critical_timings();
> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
> - rcu_idle_enter();
> + ct_idle_enter();
> entered_state = target_state->enter(dev, drv, index);
> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
> - rcu_idle_exit();
> + ct_idle_exit();
> start_critical_timings();
>
> sched_clock_idle_wakeup_event();
> diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
> index e35ae66b4794..27afb75f2650 100644
> --- a/include/linux/context_tracking.h
> +++ b/include/linux/context_tracking.h
> @@ -119,4 +119,12 @@ extern void context_tracking_init(void);
> static inline void context_tracking_init(void) { }
> #endif /* CONFIG_CONTEXT_TRACKING_USER_FORCE */
>
> +#ifdef CONFIG_CONTEXT_TRACKING
> +extern void ct_idle_enter(void);
> +extern void ct_idle_exit(void);
> +#else
> +static inline void ct_idle_enter(void) { }
> +static inline void ct_idle_exit(void) { }
> +#endif /* !CONFIG_CONTEXT_TRACKING */
> +
> #endif
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 7f12daa4708b..d3b25e65f144 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -129,7 +129,7 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { }
> * @a: Code that RCU needs to pay attention to.
> *
> * RCU read-side critical sections are forbidden in the inner idle loop,
> - * that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU
> + * that is, between the ct_idle_enter() and the ct_idle_exit() -- RCU
> * will happily ignore any such read-side critical sections. However,
> * things like powertop need tracepoints in the inner idle loop.
> *


> diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
> index da067a36b326..987f0b2a3962 100644
> --- a/kernel/context_tracking.c
> +++ b/kernel/context_tracking.c
> @@ -251,3 +251,15 @@ void __init context_tracking_init(void)
> #endif
>
> #endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */
> +
> +noinstr void ct_idle_enter(void)
> +{
> + rcu_idle_enter();
> +}
> +EXPORT_SYMBOL_GPL(ct_idle_enter);
> +
> +void ct_idle_exit(void)
> +{
> + rcu_idle_exit();
> +}
> +EXPORT_SYMBOL_GPL(ct_idle_exit);
> diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
> index c06cab6546ed..5f0dfe37234b 100644
> --- a/kernel/locking/lockdep.c
> +++ b/kernel/locking/lockdep.c
> @@ -6546,7 +6546,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
>
> /*
> * If a CPU is in the RCU-free window in idle (ie: in the section
> - * between rcu_idle_enter() and rcu_idle_exit(), then RCU
> + * between ct_idle_enter() and ct_idle_exit(), then RCU
> * considers that CPU to be in an "extended quiescent state",
> * which means that RCU will be completely ignoring that CPU.
> * Therefore, rcu_read_lock() and friends have absolutely no
> diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
> index 27aab870ae4c..85f3b884fa09 100644
> --- a/kernel/rcu/Kconfig
> +++ b/kernel/rcu/Kconfig
> @@ -8,6 +8,8 @@ menu "RCU Subsystem"
> config TREE_RCU
> bool
> default y if SMP
> + # Dynticks-idle tracking
> + select CONTEXT_TRACKING
> help
> This option selects the RCU implementation that is
> designed for very large SMP system with hundreds or
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 2a6e7723fc4d..92a288668e7c 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -664,7 +664,6 @@ noinstr void rcu_idle_enter(void)
> lockdep_assert_irqs_disabled();
> rcu_eqs_enter(false);
> }
> -EXPORT_SYMBOL_GPL(rcu_idle_enter);
>
> #ifdef CONFIG_NO_HZ_FULL
>
> @@ -912,7 +911,6 @@ void rcu_idle_exit(void)
> rcu_eqs_exit(false);
> local_irq_restore(flags);
> }
> -EXPORT_SYMBOL_GPL(rcu_idle_exit);
>
> #ifdef CONFIG_NO_HZ_FULL
> /**
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index 2e93acad1e31..738842c4886b 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -85,7 +85,7 @@ module_param(rcu_normal_after_boot, int, 0444);
> * and while lockdep is disabled.
> *
> * Note that if the CPU is in the idle loop from an RCU point of view (ie:
> - * that we are in the section between rcu_idle_enter() and rcu_idle_exit())
> + * that we are in the section between ct_idle_enter() and ct_idle_exit())
> * then rcu_read_lock_held() sets ``*ret`` to false even if the CPU did an
> * rcu_read_lock(). The reason for this is that RCU ignores CPUs that are
> * in such a section, considering these as in extended quiescent state,
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index 8f8b5020e76a..6de222b23b49 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -53,14 +53,14 @@ static noinline int __cpuidle cpu_idle_poll(void)
> {
> trace_cpu_idle(0, smp_processor_id());
> stop_critical_timings();
> - rcu_idle_enter();
> + ct_idle_enter();
> local_irq_enable();
>
> while (!tif_need_resched() &&
> (cpu_idle_force_poll || tick_check_broadcast_expired()))
> cpu_relax();
>
> - rcu_idle_exit();
> + ct_idle_exit();
> start_critical_timings();
> trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
>
> @@ -98,12 +98,12 @@ void __cpuidle default_idle_call(void)
> *
> * Trace IRQs enable here, then switch off RCU, and have
> * arch_cpu_idle() use raw_local_irq_enable(). Note that
> - * rcu_idle_enter() relies on lockdep IRQ state, so switch that
> + * ct_idle_enter() relies on lockdep IRQ state, so switch that
> * last -- this is very similar to the entry code.
> */
> trace_hardirqs_on_prepare();
> lockdep_hardirqs_on_prepare(_THIS_IP_);
> - rcu_idle_enter();
> + ct_idle_enter();
> lockdep_hardirqs_on(_THIS_IP_);
>
> arch_cpu_idle();
> @@ -116,7 +116,7 @@ void __cpuidle default_idle_call(void)
> */
> raw_local_irq_disable();
> lockdep_hardirqs_off(_THIS_IP_);
> - rcu_idle_exit();
> + ct_idle_exit();
> lockdep_hardirqs_on(_THIS_IP_);
> raw_local_irq_enable();
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 58263f90c559..f398f2bf05f4 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -27,6 +27,7 @@
> #include <linux/capability.h>
> #include <linux/cgroup_api.h>
> #include <linux/cgroup.h>
> +#include <linux/context_tracking.h>
> #include <linux/cpufreq.h>
> #include <linux/cpumask_api.h>
> #include <linux/ctype.h>
> --
> 2.25.1
>