[PATCH 2/3] context_tracking: Inherit TIF_NOHZ through forks instead of context switches

From: Frederic Weisbecker
Date: Thu Apr 02 2015 - 13:40:45 EST


TIF_NOHZ is used by context_tracking to force syscall slow-path on every
task in order to track userspace roundtrips. As such, it must be set on
all running tasks.

It's currently explicitly inherited through context switches. There is
no need to do it on this fast-path though. The flag could be simply
set once for all on all tasks, whether they are running or not.

Lets do this by setting the flag to init task on early boot and let it
propagate through fork inheritance.

Suggested-by: Oleg Nesterov <oleg@xxxxxxxxxx>
Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Dave Jones <davej@xxxxxxxxxx>
Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
---
include/linux/context_tracking.h | 10 --------
kernel/context_tracking.c | 52 +++++++++++++++++++++-------------------
kernel/sched/core.c | 1 -
3 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 2821838..b96bd29 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -14,8 +14,6 @@ extern void context_tracking_enter(enum ctx_state state);
extern void context_tracking_exit(enum ctx_state state);
extern void context_tracking_user_enter(void);
extern void context_tracking_user_exit(void);
-extern void __context_tracking_task_switch(struct task_struct *prev,
- struct task_struct *next);

static inline void user_enter(void)
{
@@ -51,19 +49,11 @@ static inline void exception_exit(enum ctx_state prev_ctx)
}
}

-static inline void context_tracking_task_switch(struct task_struct *prev,
- struct task_struct *next)
-{
- if (context_tracking_is_enabled())
- __context_tracking_task_switch(prev, next);
-}
#else
static inline void user_enter(void) { }
static inline void user_exit(void) { }
static inline enum ctx_state exception_enter(void) { return 0; }
static inline void exception_exit(enum ctx_state prev_ctx) { }
-static inline void context_tracking_task_switch(struct task_struct *prev,
- struct task_struct *next) { }
#endif /* !CONFIG_CONTEXT_TRACKING */


diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index b9e0b4f..ced8558 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -30,14 +30,6 @@ EXPORT_SYMBOL_GPL(context_tracking_enabled);
DEFINE_PER_CPU(struct context_tracking, context_tracking);
EXPORT_SYMBOL_GPL(context_tracking);

-void context_tracking_cpu_set(int cpu)
-{
- if (!per_cpu(context_tracking.active, cpu)) {
- per_cpu(context_tracking.active, cpu) = true;
- static_key_slow_inc(&context_tracking_enabled);
- }
-}
-
static bool context_tracking_recursion_enter(void)
{
int recursion;
@@ -194,24 +186,34 @@ void context_tracking_user_exit(void)
}
NOKPROBE_SYMBOL(context_tracking_user_exit);

-/**
- * __context_tracking_task_switch - context switch the syscall callbacks
- * @prev: the task that is being switched out
- * @next: the task that is being switched in
- *
- * The context tracking uses the syscall slow path to implement its user-kernel
- * boundaries probes on syscalls. This way it doesn't impact the syscall fast
- * path on CPUs that don't do context tracking.
- *
- * But we need to clear the flag on the previous task because it may later
- * migrate to some CPU that doesn't do the context tracking. As such the TIF
- * flag may not be desired there.
- */
-void __context_tracking_task_switch(struct task_struct *prev,
- struct task_struct *next)
+void context_tracking_cpu_set(int cpu)
{
- clear_tsk_thread_flag(prev, TIF_NOHZ);
- set_tsk_thread_flag(next, TIF_NOHZ);
+ static bool initialized = false;
+ struct task_struct *p, *t;
+ unsigned long flags;
+
+ if (!per_cpu(context_tracking.active, cpu)) {
+ per_cpu(context_tracking.active, cpu) = true;
+ static_key_slow_inc(&context_tracking_enabled);
+ }
+
+ if (initialized)
+ return;
+
+ set_tsk_thread_flag(&init_task, TIF_NOHZ);
+
+ /*
+ * There shouldn't be any thread at this early boot stage
+ * but the scheduler is ready to host any. So lets walk
+ * the tasklist just in case. tasklist_lock isn't necessary
+ * either that early but take it for correctness checkers.
+ */
+ read_lock_irqsave(&tasklist_lock, flags);
+ for_each_process_thread(p, t)
+ set_tsk_thread_flag(t, TIF_NOHZ);
+ read_unlock_irqrestore(&tasklist_lock, flags);
+
+ initialized = true;
}

#ifdef CONFIG_CONTEXT_TRACKING_FORCE
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 06b9a00..7aec5ba 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2312,7 +2312,6 @@ context_switch(struct rq *rq, struct task_struct *prev,
*/
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);

- context_tracking_task_switch(prev, next);
/* Here we just switch the register state and the stack. */
switch_to(prev, next, prev);
barrier();
--
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/