[patch 1/2] nohz: try to avoid IPI when configuring per-CPU posix timer

From: Marcelo Tosatti
Date: Tue Aug 25 2020 - 14:46:33 EST


When enabling per-CPU posix timers, an IPI to nohz_full CPUs might be
performed (to re-read the dependencies and possibly not re-enter
nohz_full on a given CPU).

A common case is for applications that run on nohz_full= CPUs
to not use POSIX timers (eg DPDK). This patch skips the IPI
in case the task allowed mask does not intersect with nohz_full= CPU mask,
when going through tick_nohz_dep_set_signal.

This reduces interruptions to nohz_full= CPUs.

Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx>

---
include/linux/tick.h | 11 +++++++----
kernel/time/posix-cpu-timers.c | 4 ++--
kernel/time/tick-sched.c | 27 +++++++++++++++++++++++++--
3 files changed, 34 insertions(+), 8 deletions(-)

Index: linux-2.6/include/linux/tick.h
===================================================================
--- linux-2.6.orig/include/linux/tick.h
+++ linux-2.6/include/linux/tick.h
@@ -207,7 +207,8 @@ extern void tick_nohz_dep_set_task(struc
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit);
-extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
+extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
+ struct signal_struct *signal,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
@@ -252,11 +253,12 @@ static inline void tick_dep_clear_task(s
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
-static inline void tick_dep_set_signal(struct signal_struct *signal,
+static inline void tick_dep_set_signal(struct task_struct *tsk,
+ struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
- tick_nohz_dep_set_signal(signal, bit);
+ tick_nohz_dep_set_signal(tsk, signal, bit);
}
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
@@ -284,7 +286,8 @@ static inline void tick_dep_set_task(str
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
-static inline void tick_dep_set_signal(struct signal_struct *signal,
+static inline void tick_dep_set_signal(struct task_struct *tsk,
+ struct signal_struct *signal,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
Index: linux-2.6/kernel/time/posix-cpu-timers.c
===================================================================
--- linux-2.6.orig/kernel/time/posix-cpu-timers.c
+++ linux-2.6/kernel/time/posix-cpu-timers.c
@@ -523,7 +523,7 @@ static void arm_timer(struct k_itimer *t
if (CPUCLOCK_PERTHREAD(timer->it_clock))
tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
else
- tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
+ tick_dep_set_signal(p, p->signal, TICK_DEP_BIT_POSIX_TIMER);
}

/*
@@ -1358,7 +1358,7 @@ void set_process_cpu_timer(struct task_s
if (*newval < *nextevt)
*nextevt = *newval;

- tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
+ tick_dep_set_signal(tsk, tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
}

static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
Index: linux-2.6/kernel/time/tick-sched.c
===================================================================
--- linux-2.6.orig/kernel/time/tick-sched.c
+++ linux-2.6/kernel/time/tick-sched.c
@@ -302,6 +302,27 @@ static void tick_nohz_dep_set_all(atomic
}

/*
+ * Set bit on nohz full dependency, kicking all cpus
+ * only if task can run on nohz full CPUs.
+ */
+static void tick_nohz_dep_set_all_cond(struct task_struct *tsk,
+ atomic_t *dep,
+ enum tick_dep_bits bit)
+{
+ int prev;
+ unsigned long flags;
+
+ prev = atomic_fetch_or(BIT(bit), dep);
+ if (prev)
+ return;
+
+ raw_spin_lock_irqsave(&tsk->pi_lock, flags);
+ if (cpumask_intersects(&tsk->cpus_mask, tick_nohz_full_mask))
+ tick_nohz_full_kick_all();
+ raw_spin_unlock_irqrestore(&tsk->pi_lock, flags);
+}
+
+/*
* Set a global tick dependency. Used by perf events that rely on freq and
* by unstable clock.
*/
@@ -382,9 +403,11 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_ta
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
* per process timers.
*/
-void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+void tick_nohz_dep_set_signal(struct task_struct *tsk,
+ struct signal_struct *sig,
+ enum tick_dep_bits bit)
{
- tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
+ tick_nohz_dep_set_all_cond(tsk, &sig->tick_dep_mask, bit);
}

void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)