[RFC][PATCH v2 1/5] tick/nohz: Introduce tick_get_sleep_length()

From: Peter Zijlstra
Date: Wed Aug 02 2023 - 09:31:46 EST


Add a variant of tick_nohz_get_sleep_length() that conditionally does
the NOHZ part.

tick_get_sleep_length(false) returns the delta_next return value of
tick_nohz_get_sleep_length(), while tick_get_sleep_length(true)
returns the regular return of tick_nohz_get_sleep_length().

This allows eliding tick_nohz_next_event() -- which is going to be
expensive with timer-pull.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/tick.h | 5 +++++
kernel/time/tick-sched.c | 35 +++++++++++++++++++++++------------
2 files changed, 28 insertions(+), 12 deletions(-)

--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -136,6 +136,7 @@ extern void tick_nohz_irq_exit(void);
extern bool tick_nohz_idle_got_tick(void);
extern ktime_t tick_nohz_get_next_hrtimer(void);
extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next);
+extern ktime_t tick_get_sleep_length(bool nohz);
extern unsigned long tick_nohz_get_idle_calls(void);
extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
@@ -168,6 +169,10 @@ static inline ktime_t tick_nohz_get_slee
*delta_next = TICK_NSEC;
return *delta_next;
}
+static inline ktime_t tick_get_sleep_length(bool nohz)
+{
+ return TICK_NSEC;
+}
static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }

--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1218,17 +1218,7 @@ ktime_t tick_nohz_get_next_hrtimer(void)
return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
}

-/**
- * tick_nohz_get_sleep_length - return the expected length of the current sleep
- * @delta_next: duration until the next event if the tick cannot be stopped
- *
- * Called from power state control code with interrupts disabled.
- *
- * The return value of this function and/or the value returned by it through the
- * @delta_next pointer can be negative which must be taken into account by its
- * callers.
- */
-ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
+static ktime_t __tick_nohz_get_sleep_length(ktime_t *delta_next, bool nohz)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
@@ -1244,7 +1234,7 @@ ktime_t tick_nohz_get_sleep_length(ktime

*delta_next = ktime_sub(dev->next_event, now);

- if (!can_stop_idle_tick(cpu, ts))
+ if (!nohz || !can_stop_idle_tick(cpu, ts))
return *delta_next;

next_event = tick_nohz_next_event(ts, cpu);
@@ -1262,6 +1252,27 @@ ktime_t tick_nohz_get_sleep_length(ktime
}

/**
+ * tick_nohz_get_sleep_length - return the expected length of the current sleep
+ * @delta_next: duration until the next event if the tick cannot be stopped
+ *
+ * Called from power state control code with interrupts disabled.
+ *
+ * The return value of this function and/or the value returned by it through the
+ * @delta_next pointer can be negative which must be taken into account by its
+ * callers.
+ */
+ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
+{
+ return __tick_nohz_get_sleep_length(delta_next, true);
+}
+
+ktime_t tick_get_sleep_length(bool nohz)
+{
+ ktime_t delta;
+ return __tick_nohz_get_sleep_length(&delta, nohz);
+}
+
+/**
* tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
* for a particular CPU.
*