[PATCH 4/4] nohz_full: Allow the boot CPU to be full nohz

From: Nicholas Piggin
Date: Thu Apr 04 2019 - 08:07:35 EST


Allow the boot CPU to be full nohz, and have it take the do_timer duty
temporarily during boot.

nohz_full has been successful at significantly reducing jitter for a
large supercomputer customer, but their job control system requires
CPU0 to be for housekeeping.

This will cause suspend / kexec freeze to occur on a non-boot CPU,
so the option may need to be made conditional by arch?

Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx>
---
kernel/time/tick-common.c | 50 +++++++++++++++++++++++++++++++++++----
kernel/time/tick-sched.c | 27 +++++++++++----------
2 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 529143b4c8d2..31146c13226e 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -46,6 +46,14 @@ ktime_t tick_period;
* procedure also covers cpu hotplug.
*/
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
+#ifdef CONFIG_NO_HZ_FULL
+/*
+ * tick_do_timer_boot_cpu indicates the boot CPU temporarily owns
+ * tick_do_timer_cpu and it should be taken over by an eligible secondary
+ * when one comes online.
+ */
+static int tick_do_timer_boot_cpu __read_mostly = -1;
+#endif

/*
* Debugging: see timer_list.c
@@ -167,6 +175,26 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
}
}

+#ifdef CONFIG_NO_HZ_FULL
+static void giveup_do_timer(void *info)
+{
+ int cpu = *(unsigned int *)info;
+
+ WARN_ON(tick_do_timer_cpu != smp_processor_id());
+
+ tick_do_timer_cpu = cpu;
+}
+
+static void tick_take_do_timer_from_boot(void)
+{
+ int cpu = smp_processor_id();
+ int from = tick_do_timer_boot_cpu;
+
+ if (from >= 0 && from != cpu)
+ smp_call_function_single(from, giveup_do_timer, &cpu, 1);
+}
+#endif
+
/*
* Setup the tick device
*/
@@ -186,12 +214,26 @@ static void tick_setup_device(struct tick_device *td,
* this cpu:
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
- if (!tick_nohz_full_cpu(cpu))
- tick_do_timer_cpu = cpu;
- else
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ tick_do_timer_cpu = cpu;
+
tick_next_period = ktime_get();
tick_period = NSEC_PER_SEC / HZ;
+#ifdef CONFIG_NO_HZ_FULL
+ /*
+ * The boot CPU may be nohz_full, in which case set
+ * tick_do_timer_boot_cpu so the first housekeeping
+ * secondary that comes up will take do_timer from
+ * us.
+ */
+ if (tick_nohz_full_cpu(cpu))
+ tick_do_timer_boot_cpu = cpu;
+
+ } else if (tick_do_timer_boot_cpu != -1 &&
+ !tick_nohz_full_cpu(cpu)) {
+ tick_take_do_timer_from_boot();
+ tick_do_timer_boot_cpu = -1;
+ WARN_ON(tick_do_timer_cpu != cpu);
+#endif
}

/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6fa52cd6df0b..c0105bf4ecd9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -121,10 +121,14 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
* into a long sleep. If two CPUs happen to assign themselves to
* this duty, then the jiffies update is still serialized by
* jiffies_lock.
+ *
+ * If nohz_full is enabled, this should not happen because the
+ * tick_do_timer_cpu never relinquishes.
*/
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
- && !tick_nohz_full_cpu(cpu))
+ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ WARN_ON(tick_nohz_full_running);
tick_do_timer_cpu = cpu;
+ }
#endif

/* Check, if the jiffies need an update */
@@ -395,8 +399,8 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask)
static int tick_nohz_cpu_down(unsigned int cpu)
{
/*
- * The boot CPU handles housekeeping duty (unbound timers,
- * workqueues, timekeeping, ...) on behalf of full dynticks
+ * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
+ * timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
@@ -423,14 +427,6 @@ void __init tick_nohz_init(void)
return;
}

- cpu = smp_processor_id();
-
- if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
- pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
- cpu);
- cpumask_clear_cpu(cpu, tick_nohz_full_mask);
- }
-
for_each_cpu(cpu, tick_nohz_full_mask)
context_tracking_cpu_set(cpu);

@@ -904,8 +900,13 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
/*
* Boot safety: make sure the timekeeping duty has been
* assigned before entering dyntick-idle mode,
+ * tick_do_timer_cpu is TICK_DO_TIMER_BOOT
*/
- if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
+ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT))
+ return false;
+
+ /* Should not happen for nohz-full */
+ if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
return false;
}

--
2.20.1