[RFC PATCH for 4.14] membarrier powerpc: Move hook to switch_mm()

From: Mathieu Desnoyers
Date: Thu Sep 28 2017 - 16:21:57 EST


Nick has a valid point that the sched_in() hook is a fast-path compared
to switch_mm(). Adding an extra TIF test in a fast-path to save a
barrier in a comparatively slow-path is therefore not such a good idea
overall.

Therefore, move the architecture hook to switch_mm() instead.

[ This patch is aimed at Paul's tree. It applies on top of
"membarrier: Provide register expedited private command (v4)" and
"membarrier: Document scheduler barrier requirements (v4)". ]

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
CC: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CC: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
CC: Nicholas Piggin <npiggin@xxxxxxxxx>
CC: Boqun Feng <boqun.feng@xxxxxxxxx>
CC: Andrew Hunter <ahh@xxxxxxxxxx>
CC: Maged Michael <maged.michael@xxxxxxxxx>
CC: gromer@xxxxxxxxxx
CC: Avi Kivity <avi@xxxxxxxxxxxx>
CC: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
CC: Paul Mackerras <paulus@xxxxxxxxx>
CC: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
CC: Dave Watson <davejwatson@xxxxxx>
CC: Alan Stern <stern@xxxxxxxxxxxxxxxxxxx>
CC: Will Deacon <will.deacon@xxxxxxx>
CC: Andy Lutomirski <luto@xxxxxxxxxx>
CC: Ingo Molnar <mingo@xxxxxxxxxx>
CC: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx>
CC: linuxppc-dev@xxxxxxxxxxxxxxxx
CC: linux-arch@xxxxxxxxxxxxxxx
---
arch/powerpc/include/asm/membarrier.h | 9 ++++-----
arch/powerpc/mm/mmu_context.c | 7 +++++++
include/linux/sched/mm.h | 13 ++++---------
kernel/sched/core.c | 6 ++----
4 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h
index 588154c1cf57..61152a7a3cf9 100644
--- a/arch/powerpc/include/asm/membarrier.h
+++ b/arch/powerpc/include/asm/membarrier.h
@@ -1,8 +1,8 @@
#ifndef _ASM_POWERPC_MEMBARRIER_H
#define _ASM_POWERPC_MEMBARRIER_H

-static inline void membarrier_arch_sched_in(struct task_struct *prev,
- struct task_struct *next)
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+ struct mm_struct *next, struct task_struct *tsk)
{
/*
* Only need the full barrier when switching between processes.
@@ -11,9 +11,8 @@ static inline void membarrier_arch_sched_in(struct task_struct *prev,
* when switching from userspace to kernel is not needed after
* store to rq->curr.
*/
- if (likely(!test_ti_thread_flag(task_thread_info(next),
- TIF_MEMBARRIER_PRIVATE_EXPEDITED)
- || !prev->mm || prev->mm == next->mm))
+ if (likely(!test_ti_thread_flag(task_thread_info(tsk),
+ TIF_MEMBARRIER_PRIVATE_EXPEDITED) || !prev))
return;

/*
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 0f613bc63c50..22f5c91cdc38 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -12,6 +12,7 @@

#include <linux/mm.h>
#include <linux/cpu.h>
+#include <linux/sched/mm.h>

#include <asm/mmu_context.h>

@@ -67,6 +68,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
*
* On the read side the barrier is in pte_xchg(), which orders
* the store to the PTE vs the load of mm_cpumask.
+ *
+ * This full barrier is needed by membarrier when switching
+ * between processes after store to rq->curr, before user-space
+ * memory accesses.
*/
smp_mb();

@@ -89,6 +94,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,

if (new_on_cpu)
radix_kvm_prefetch_workaround(next);
+ else
+ membarrier_arch_switch_mm(prev, next, tsk);

/*
* The actual HW switching method differs between the various
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 1bd10c2c0893..d5a9ab8f3836 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -215,8 +215,8 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_HOOKS
#include <asm/membarrier.h>
#else
-static inline void membarrier_arch_sched_in(struct task_struct *prev,
- struct task_struct *next)
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+ struct mm_struct *next, struct task_struct *tsk)
{
}
static inline void membarrier_arch_fork(struct task_struct *t,
@@ -232,11 +232,6 @@ static inline void membarrier_arch_register_private_expedited(
}
#endif

-static inline void membarrier_sched_in(struct task_struct *prev,
- struct task_struct *next)
-{
- membarrier_arch_sched_in(prev, next);
-}
static inline void membarrier_fork(struct task_struct *t,
unsigned long clone_flags)
{
@@ -252,8 +247,8 @@ static inline void membarrier_execve(struct task_struct *t)
membarrier_arch_execve(t);
}
#else
-static inline void membarrier_sched_in(struct task_struct *prev,
- struct task_struct *next)
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+ struct mm_struct *next, struct task_struct *tsk)
{
}
static inline void membarrier_fork(struct task_struct *t,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6254f87645de..8585abebe32b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2643,7 +2643,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
*/
prev_state = prev->state;
vtime_task_switch(prev);
- membarrier_sched_in(prev, current);
perf_event_task_sched_in(prev, current);
finish_lock_switch(rq, prev);
finish_arch_post_lock_switch();
@@ -3357,13 +3356,12 @@ static void __sched notrace __schedule(bool preempt)
*
* Here are the schemes providing that barrier on the
* various architectures:
- * - mm ? switch_mm() : mmdrop() for x86, s390, sparc,
+ * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
+ * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
* - finish_lock_switch() for weakly-ordered
* architectures where spin_unlock is a full barrier,
* - switch_to() for arm64 (weakly-ordered, spin_unlock
* is a RELEASE barrier),
- * - membarrier_arch_sched_in() for PowerPC,
- * (weakly-ordered, spin_unlock is a RELEASE barrier).
*/
++*switch_count;

--
2.11.0