[PATCH v2 11/11] mm,sched: conditionally skip lazy TLB mm refcounting
From: Rik van Riel
Date: Sun Jul 29 2018 - 15:55:23 EST
On Sat, 28 Jul 2018 21:21:17 -0700
Andy Lutomirski <luto@xxxxxxxxxx> wrote:
> On Sat, Jul 28, 2018 at 2:53 PM, Rik van Riel <riel@xxxxxxxxxxx> wrote:
> > Conditionally skip lazy TLB mm refcounting. When an architecture has
> > CONFIG_ARCH_NO_ACTIVE_MM_REFCOUNTING enabled, an mm that is used in
> > lazy TLB mode anywhere will get shot down from exit_mmap, and there
> > in no need to incur the cache line bouncing overhead of refcounting
> > a lazy TLB mm.
>
> Unless I've misunderstood something, this patch results in idle tasks
> whose active_mm has been freed still having active_mm pointing at
> freed memory.
---8<---
Author: Rik van Riel <riel@xxxxxxxxxxx>
Subject: [PATCH 11/11] mm,sched: conditionally skip lazy TLB mm refcounting
Conditionally skip lazy TLB mm refcounting. When an architecture has
CONFIG_ARCH_NO_ACTIVE_MM_REFCOUNTING enabled, an mm that is used in
lazy TLB mode anywhere will get shot down from exit_mmap, and there
in no need to incur the cache line bouncing overhead of refcounting
a lazy TLB mm.
Implement this by moving the refcounting of a lazy TLB mm to helper
functions, which skip the refcounting when it is not necessary.
Deal with use_mm and unuse_mm by fully splitting out the refcounting
of the lazy TLB mm a kernel thread may have when entering use_mm from
the refcounting of the mm that use_mm is about to start using.
Signed-off-by: Rik van Riel <riel@xxxxxxxxxxx>
---
arch/x86/mm/tlb.c | 5 +++--
fs/exec.c | 2 +-
include/linux/sched/mm.h | 25 +++++++++++++++++++++++++
kernel/sched/core.c | 6 +++---
mm/mmu_context.c | 21 ++++++++++++++-------
5 files changed, 46 insertions(+), 13 deletions(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 425cb9fa2640..d53d9c19b97d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -8,6 +8,7 @@
#include <linux/cpu.h>
#include <linux/debugfs.h>
#include <linux/gfp.h>
+#include <linux/sched/mm.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@@ -141,7 +142,7 @@ void leave_mm(void *dummy)
switch_mm(NULL, &init_mm, NULL);
current->active_mm = &init_mm;
- mmdrop(loaded_mm);
+ drop_lazy_mm(loaded_mm);
}
EXPORT_SYMBOL_GPL(leave_mm);
@@ -486,7 +487,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
*/
switch_mm_irqs_off(NULL, &init_mm, NULL);
current->active_mm = &init_mm;
- mmdrop(loaded_mm);
+ drop_lazy_mm(loaded_mm);
return;
}
diff --git a/fs/exec.c b/fs/exec.c
index bdd0eacefdf5..7a6d4811b02b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1043,7 +1043,7 @@ static int exec_mmap(struct mm_struct *mm)
mmput(old_mm);
return 0;
}
- mmdrop(active_mm);
+ drop_lazy_mm(active_mm);
return 0;
}
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 44d356f5e47c..7308bf38012f 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,31 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}
+/*
+ * In lazy TLB mode, a CPU keeps the mm of the last process mapped while
+ * running a kernel thread or idle; we must make sure the lazy TLB mm and
+ * page tables do not disappear while a lazy TLB mode CPU uses them.
+ * There are two ways to handle the race between lazy TLB CPUs and exit_mmap:
+ * 1) Have a lazy TLB CPU hold a refcount on the lazy TLB mm.
+ * 2) Have the architecture code shoot down the lazy TLB mm from exit_mmap;
+ * in that case, refcounting can be skipped, reducing cache line bouncing.
+ */
+static inline void grab_lazy_mm(struct mm_struct *mm)
+{
+ if (IS_ENABLED(CONFIG_ARCH_NO_ACTIVE_MM_REFCOUNTING))
+ return;
+
+ mmgrab(mm);
+}
+
+static inline void drop_lazy_mm(struct mm_struct *mm)
+{
+ if (IS_ENABLED(CONFIG_ARCH_NO_ACTIVE_MM_REFCOUNTING))
+ return;
+
+ mmdrop(mm);
+}
+
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
* @mm: The address space to pin.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c45de46fdf10..11724c9e88b0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2691,7 +2691,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
*/
if (mm) {
membarrier_mm_sync_core_before_usermode(mm);
- mmdrop(mm);
+ drop_lazy_mm(mm);
}
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
@@ -2805,7 +2805,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
*/
if (!mm) {
next->active_mm = oldmm;
- mmgrab(oldmm);
+ grab_lazy_mm(oldmm);
enter_lazy_tlb(oldmm, next);
} else
switch_mm_irqs_off(oldmm, mm, next);
@@ -5532,7 +5532,7 @@ void idle_task_exit(void)
current->active_mm = &init_mm;
finish_arch_post_lock_switch();
}
- mmdrop(mm);
+ drop_lazy_mm(mm);
}
/*
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 3e612ae748e9..d5c2524cdd9a 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -24,12 +24,15 @@ void use_mm(struct mm_struct *mm)
struct mm_struct *active_mm;
struct task_struct *tsk = current;
+ /* Kernel threads have a NULL tsk->mm when entering. */
+ WARN_ON(tsk->mm);
+
task_lock(tsk);
+ /* Previous ->active_mm was held in lazy TLB mode. */
active_mm = tsk->active_mm;
- if (active_mm != mm) {
- mmgrab(mm);
- tsk->active_mm = mm;
- }
+ /* Grab mm for reals; tsk->mm needs to stick around until unuse_mm. */
+ mmgrab(mm);
+ tsk->active_mm = mm;
tsk->mm = mm;
switch_mm(active_mm, mm, tsk);
task_unlock(tsk);
@@ -37,8 +40,9 @@ void use_mm(struct mm_struct *mm)
finish_arch_post_lock_switch();
#endif
- if (active_mm != mm)
- mmdrop(active_mm);
+ /* Drop the lazy TLB mode mm. */
+ if (active_mm)
+ drop_lazy_mm(active_mm);
}
EXPORT_SYMBOL_GPL(use_mm);
@@ -57,8 +61,11 @@ void unuse_mm(struct mm_struct *mm)
task_lock(tsk);
sync_mm_rss(mm);
tsk->mm = NULL;
- /* active_mm is still 'mm' */
+ /* active_mm is still 'mm'; grab it as a lazy TLB mm */
+ grab_lazy_mm(mm);
enter_lazy_tlb(mm, tsk);
+ /* drop the tsk->mm refcount */
+ mmdrop(mm);
task_unlock(tsk);
}
EXPORT_SYMBOL_GPL(unuse_mm);
--
2.14.4