[RFC PATCH for 4.15 4/6] membarrier: Provide core serializing command

From: Mathieu Desnoyers
Date: Wed Nov 08 2017 - 13:35:56 EST


Provide core serializing membarrier command to support memory reclaim
by JIT.

Each architecture needs to explicitly opt into that support by
documenting in their architecture code how they provide the core
serializing instructions required when returning from the membarrier
IPI, and after the scheduler has updated the curr->mm pointer (before
going back to user-space). They should then define
ARCH_HAS_MEMBARRIER_SYNC_CORE to enable support for that command on
their architecture.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
CC: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CC: Andy Lutomirski <luto@xxxxxxxxxx>
CC: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
CC: Boqun Feng <boqun.feng@xxxxxxxxx>
CC: Andrew Hunter <ahh@xxxxxxxxxx>
CC: Maged Michael <maged.michael@xxxxxxxxx>
CC: gromer@xxxxxxxxxx
CC: Avi Kivity <avi@xxxxxxxxxxxx>
CC: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
CC: Paul Mackerras <paulus@xxxxxxxxx>
CC: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
CC: Dave Watson <davejwatson@xxxxxx>
CC: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CC: Ingo Molnar <mingo@xxxxxxxxxx>
CC: "H. Peter Anvin" <hpa@xxxxxxxxx>
CC: Andrea Parri <parri.andrea@xxxxxxxxx>
---
include/linux/sched/mm.h | 9 +++++--
include/uapi/linux/membarrier.h | 14 ++++++++---
init/Kconfig | 3 +++
kernel/sched/membarrier.c | 56 ++++++++++++++++++++++++++++++-----------
4 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index ff1d510a9c51..a888da398517 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -219,8 +219,13 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)

#ifdef CONFIG_MEMBARRIER
enum {
- MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
- MEMBARRIER_STATE_SWITCH_MM = (1U << 1),
+ MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
+ MEMBARRIER_STATE_SWITCH_MM = (1U << 1),
+ MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 2),
+};
+
+enum {
+ MEMBARRIER_FLAG_SYNC_CORE = (1U << 0),
};

#ifdef CONFIG_ARCH_HAS_MEMBARRIER_HOOKS
diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h
index 4e01ad7ffe98..4dd4715a6c08 100644
--- a/include/uapi/linux/membarrier.h
+++ b/include/uapi/linux/membarrier.h
@@ -64,18 +64,24 @@
* Register the process intent to use
* MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
* returns 0.
+ * @MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
+ * TODO
+ * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
+ * TODO
*
* Command to be passed to the membarrier system call. The commands need to
* be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
* the value 0.
*/
enum membarrier_cmd {
- MEMBARRIER_CMD_QUERY = 0,
- MEMBARRIER_CMD_SHARED = (1 << 0),
+ MEMBARRIER_CMD_QUERY = 0,
+ MEMBARRIER_CMD_SHARED = (1 << 0),
/* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
/* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
- MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3),
- MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4),
+ MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3),
+ MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4),
+ MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 5),
+ MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 6),
};

#endif /* _UAPI_LINUX_MEMBARRIER_H */
diff --git a/init/Kconfig b/init/Kconfig
index ba7f17971554..ea4a7ce10328 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1403,6 +1403,9 @@ config MEMBARRIER
config ARCH_HAS_MEMBARRIER_HOOKS
bool

+config ARCH_HAS_MEMBARRIER_SYNC_CORE
+ bool
+
config EMBEDDED
bool "Embedded system"
option allnoconfig_y
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 23bd256f8458..6b3cbaa03ed7 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -26,24 +26,41 @@
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
* except MEMBARRIER_CMD_QUERY.
*/
+#ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
+#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
+ (MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
+ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
+#else
+#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0
+#endif
+
#define MEMBARRIER_CMD_BITMASK \
(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \
- | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
+ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
+ | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)

static void ipi_mb(void *info)
{
smp_mb(); /* IPIs should be serializing but paranoid. */
}

-static int membarrier_private_expedited(void)
+static int membarrier_private_expedited(int flags)
{
int cpu;
bool fallback = false;
cpumask_var_t tmpmask;

- if (!(atomic_read(&current->mm->membarrier_state)
- & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
- return -EPERM;
+ if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
+ return -EINVAL;
+ if (!(atomic_read(&current->mm->membarrier_state)
+ & MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
+ return -EPERM;
+ } else {
+ if (!(atomic_read(&current->mm->membarrier_state)
+ & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
+ return -EPERM;
+ }

if (num_online_cpus() == 1)
return 0;
@@ -103,22 +120,28 @@ static int membarrier_private_expedited(void)
return 0;
}

-static void membarrier_register_private_expedited(void)
+static int membarrier_register_private_expedited(int flags)
{
struct task_struct *p = current;
struct mm_struct *mm = p->mm;
+ int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY;
+
+ if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
+ if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
+ return -EINVAL;
+ state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
+ }

/*
* We need to consider threads belonging to different thread
* groups, which use the same mm. (CLONE_VM but not
* CLONE_THREAD).
*/
- if (atomic_read(&mm->membarrier_state)
- & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
- return;
- membarrier_arch_register_private_expedited(p);
- atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
- &mm->membarrier_state);
+ if (atomic_read(&mm->membarrier_state) & state)
+ return 0;
+ membarrier_arch_register_private_expedited(p, flags);
+ atomic_or(state, &mm->membarrier_state);
+ return 0;
}

/**
@@ -169,10 +192,13 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
synchronize_sched();
return 0;
case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
- return membarrier_private_expedited();
+ return membarrier_private_expedited(0);
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
- membarrier_register_private_expedited();
- return 0;
+ return membarrier_register_private_expedited(0);
+ case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
+ return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
+ case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
+ return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
default:
return -EINVAL;
}
--
2.11.0