To gather the architecture requirements of the "private/global
expedited" membarrier commands. The file will be expanded to
integrate further information about the membarrier syscall (as
needed/desired in the future). While at it, amend some related
inline comments in the membarrier codebase.
Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Signed-off-by: Andrea Parri <parri.andrea@xxxxxxxxx>
---
Documentation/scheduler/index.rst | 1 +
Documentation/scheduler/membarrier.rst | 37 ++++++++++++++++++++++++++
MAINTAINERS | 1 +
kernel/sched/core.c | 7 ++++-
kernel/sched/membarrier.c | 8 +++---
5 files changed, 49 insertions(+), 5 deletions(-)
create mode 100644 Documentation/scheduler/membarrier.rst
diff --git a/Documentation/scheduler/index.rst b/Documentation/scheduler/index.rst
index 3170747226f6d..43bd8a145b7a9 100644
--- a/Documentation/scheduler/index.rst
+++ b/Documentation/scheduler/index.rst
@@ -7,6 +7,7 @@ Scheduler
completion
+ membarrier
sched-arch
sched-bwc
sched-deadline
diff --git a/Documentation/scheduler/membarrier.rst b/Documentation/scheduler/membarrier.rst
new file mode 100644
index 0000000000000..ab7ee3824b407
--- /dev/null
+++ b/Documentation/scheduler/membarrier.rst
@@ -0,0 +1,37 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+membarrier() System Call
+========================
+
+MEMBARRIER_CMD_{PRIVATE,GLOBAL}_EXPEDITED - Architecture requirements
+=====================================================================
+
+Memory barriers before updating rq->curr
+----------------------------------------
+
+The command requires each architecture to have a full memory barrier after
+coming from user-space, before updating rq->curr. This barrier is implied
+by the sequence rq_lock(); smp_mb__after_spinlock() in __schedule(). The
+barrier matches a full barrier in the proximity of the membarrier system
+call exit, cf. membarrier_{private,global}_expedited().
+
+Memory barriers after updating rq->curr
+---------------------------------------
+
+The command requires each architecture to have a full memory barrier after
+updating rq->curr, before returning to user-space. The schemes providing
+this barrier on the various architectures are as follows.
+
+ - alpha, arc, arm, hexagon, mips rely on the full barrier implied by
+ spin_unlock() in finish_lock_switch().
+
+ - arm64 relies on the full barrier implied by switch_to().
+
+ - powerpc, riscv, s390, sparc, x86 rely on the full barrier implied by
+ switch_mm(), if mm is not NULL; they rely on the full barrier implied
+ by mmdrop(), otherwise. On powerpc and riscv, switch_mm() relies on
+ membarrier_arch_switch_mm().
+
+The barrier matches a full barrier in the proximity of the membarrier system
+call entry, cf. membarrier_{private,global}_expedited().
diff --git a/MAINTAINERS b/MAINTAINERS
index 0f8cec504b2ba..6bce0aeecb4f2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13815,6 +13815,7 @@ M: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
M: "Paul E. McKenney" <paulmck@xxxxxxxxxx>
L: linux-kernel@xxxxxxxxxxxxxxx
S: Supported
+F: Documentation/scheduler/membarrier.rst
F: arch/*/include/asm/membarrier.h
F: include/uapi/linux/membarrier.h
F: kernel/sched/membarrier.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 711dc753f7216..b51bc86f8340c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6599,7 +6599,9 @@ static void __sched notrace __schedule(unsigned int sched_mode)
* if (signal_pending_state()) if (p->state & @state)
*
* Also, the membarrier system call requires a full memory barrier
- * after coming from user-space, before storing to rq->curr.
+ * after coming from user-space, before storing to rq->curr; this
+ * barrier matches a full barrier in the proximity of the membarrier
+ * system call exit.
*/
rq_lock(rq, &rf);
smp_mb__after_spinlock();
@@ -6677,6 +6679,9 @@ static void __sched notrace __schedule(unsigned int sched_mode)
* architectures where spin_unlock is a full barrier,
* - switch_to() for arm64 (weakly-ordered, spin_unlock
* is a RELEASE barrier),
+ *
+ * The barrier matches a full barrier in the proximity of
+ * the membarrier system call entry.
*/
++*switch_count;
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 2ad881d07752c..f3d91628d6b8a 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -251,7 +251,7 @@ static int membarrier_global_expedited(void)
return 0;
/*
- * Matches memory barriers around rq->curr modification in
+ * Matches memory barriers after rq->curr modification in
* scheduler.
*/
smp_mb(); /* system call entry is not a mb. */
@@ -300,7 +300,7 @@ static int membarrier_global_expedited(void)
/*
* Memory barrier on the caller thread _after_ we finished
- * waiting for the last IPI. Matches memory barriers around
+ * waiting for the last IPI. Matches memory barriers before
* rq->curr modification in scheduler.
*/
smp_mb(); /* exit from system call is not a mb */
@@ -339,7 +339,7 @@ static int membarrier_private_expedited(int flags, int cpu_id)
return 0;
/*
- * Matches memory barriers around rq->curr modification in
+ * Matches memory barriers after rq->curr modification in
* scheduler.
*/
smp_mb(); /* system call entry is not a mb. */
@@ -415,7 +415,7 @@ static int membarrier_private_expedited(int flags, int cpu_id)
/*
* Memory barrier on the caller thread _after_ we finished
- * waiting for the last IPI. Matches memory barriers around
+ * waiting for the last IPI. Matches memory barriers before
* rq->curr modification in scheduler.
*/
smp_mb(); /* exit from system call is not a mb */