[patch 1/6] sched/idle: Conditionally handle tick broadcast in default_idle_call()

From: Thomas Gleixner
Date: Thu Feb 29 2024 - 09:24:15 EST


X86 has a idle routine for AMD CPUs which are affected by erratum 400. On
the affected CPUs the local APIC timer stops in the C1E halt state. It
therefore requires tick broadcasting. The invocation of
tick_broadcast_enter()/exit() from this function violates the RCU
constraints because it can end up in lockdep or tracing, which rightfully
triggers a warning.

tick_broadcast_enter()/exit() must be invoked before ct_cpuidle_enter() and
after ct_cpuidle_exit() in default_idle_call().

Add a static branch conditional invocation of tick_broadcast_enter()/exit()
into this function to allow X86 to replace the AMD specific idle code. It's
guarded by a config switch which will be selected by X86. Otherwise it's a
NOOP.

Reported-by: Borislav Petkov <bp@xxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
include/linux/cpu.h | 2 ++
include/linux/tick.h | 3 +++
kernel/sched/idle.c | 21 +++++++++++++++++++++
kernel/time/Kconfig | 5 +++++
4 files changed, 31 insertions(+)

--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -196,6 +196,8 @@ void arch_cpu_idle(void);
void arch_cpu_idle_prepare(void);
void arch_cpu_idle_enter(void);
void arch_cpu_idle_exit(void);
+void arch_tick_broadcast_enter(void);
+void arch_tick_broadcast_exit(void);
void __noreturn arch_cpu_idle_dead(void);

#ifdef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -12,6 +12,7 @@
#include <linux/cpumask.h>
#include <linux/sched.h>
#include <linux/rcupdate.h>
+#include <linux/static_key.h>

#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern void __init tick_init(void);
@@ -69,6 +70,8 @@ extern void tick_broadcast_control(enum
static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { }
#endif /* BROADCAST */

+extern struct static_key_false arch_needs_tick_broadcast;
+
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
extern void tick_offline_cpu(unsigned int cpu);
#else
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -81,6 +81,25 @@ void __weak arch_cpu_idle(void)
cpu_idle_force_poll = 1;
}

+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST_IDLE
+DEFINE_STATIC_KEY_FALSE(arch_needs_tick_broadcast);
+
+static inline void cond_tick_broadcast_enter(void)
+{
+ if (static_branch_unlikely(&arch_needs_tick_broadcast))
+ tick_broadcast_enter();
+}
+
+static inline void cond_tick_broadcast_exit(void)
+{
+ if (static_branch_unlikely(&arch_needs_tick_broadcast))
+ tick_broadcast_exit();
+}
+#else
+static inline void cond_tick_broadcast_enter(void) { }
+static inline void cond_tick_broadcast_exit(void) { }
+#endif
+
/**
* default_idle_call - Default CPU idle routine.
*
@@ -90,6 +109,7 @@ void __cpuidle default_idle_call(void)
{
instrumentation_begin();
if (!current_clr_polling_and_test()) {
+ cond_tick_broadcast_enter();
trace_cpu_idle(1, smp_processor_id());
stop_critical_timings();

@@ -99,6 +119,7 @@ void __cpuidle default_idle_call(void)

start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ cond_tick_broadcast_exit();
}
local_irq_enable();
instrumentation_end();
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -39,6 +39,11 @@ config GENERIC_CLOCKEVENTS_BROADCAST
bool
depends on GENERIC_CLOCKEVENTS

+# Handle broadcast in default_idle_call()
+config GENERIC_CLOCKEVENTS_BROADCAST_IDLE
+ bool
+ depends on GENERIC_CLOCKEVENTS_BROADCAST
+
# Automatically adjust the min. reprogramming time for
# clock event device
config GENERIC_CLOCKEVENTS_MIN_ADJUST