[PATCH v4 2/4] lazy tlb: allow lazy tlb mm refcounting to be configurable

From: Nicholas Piggin
Date: Fri Jun 04 2021 - 21:42:40 EST


Add CONFIG_MMU_TLB_REFCOUNT which enables refcounting of the lazy tlb mm
when it is context switched. This can be disabled by architectures that
don't require this refcounting if they clean up lazy tlb mms when the
last refcount is dropped. Currently this is always enabled, which is
what existing code does, so the patch is effectively a no-op.

Rename rq->prev_mm to rq->prev_lazy_mm, because that's what it is.

Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx>
---
arch/Kconfig | 4 ++++
include/linux/sched/mm.h | 13 +++++++++++--
kernel/sched/core.c | 22 ++++++++++++++++++----
kernel/sched/sched.h | 4 +++-
4 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index c45b770d3579..1cff045cdde6 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -418,6 +418,10 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
irqs disabled over activate_mm. Architectures that do IPI based TLB
shootdowns should enable this.

+# Use normal mm refcounting for MMU_LAZY_TLB kernel thread references.
+config MMU_LAZY_TLB_REFCOUNT
+ def_bool y
+
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index bfd1baca5266..29e4638ad124 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -52,12 +52,21 @@ static inline void mmdrop(struct mm_struct *mm)
/* Helpers for lazy TLB mm refcounting */
static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
{
- mmgrab(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
+ mmgrab(mm);
}

static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
{
- mmdrop(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) {
+ mmdrop(mm);
+ } else {
+ /*
+ * mmdrop_lazy_tlb must provide a full memory barrier, see the
+ * membarrier comment finish_task_switch which relies on this.
+ */
+ smp_mb();
+ }
}

/**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e359c76ea2e2..5e10cb712be3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4171,7 +4171,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
__releases(rq->lock)
{
struct rq *rq = this_rq();
- struct mm_struct *mm = rq->prev_mm;
+ struct mm_struct *mm = NULL;
long prev_state;

/*
@@ -4190,7 +4190,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
current->comm, current->pid, preempt_count()))
preempt_count_set(FORK_PREEMPT_COUNT);

- rq->prev_mm = NULL;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ mm = rq->prev_lazy_mm;
+ rq->prev_lazy_mm = NULL;
+#endif

/*
* A task struct has one reference for the use as "current".
@@ -4326,9 +4329,20 @@ context_switch(struct rq *rq, struct task_struct *prev,
switch_mm_irqs_off(prev->active_mm, next->mm, next);

if (!prev->mm) { // from kernel
- /* will mmdrop_lazy_tlb() in finish_task_switch(). */
- rq->prev_mm = prev->active_mm;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ /* Will mmdrop_lazy_tlb() in finish_task_switch(). */
+ rq->prev_lazy_mm = prev->active_mm;
prev->active_mm = NULL;
+#else
+ /*
+ * Without MMU_LAZY_TLB_REFCOUNT there is no lazy
+ * tracking (because no rq->prev_lazy_mm) in
+ * finish_task_switch, so no mmdrop_lazy_tlb(), so no
+ * memory barrier for membarrier (see the membarrier
+ * comment in finish_task_switch()). Do it here.
+ */
+ smp_mb();
+#endif
}
}

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a189bec13729..0729cf19a987 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -961,7 +961,9 @@ struct rq {
struct task_struct *idle;
struct task_struct *stop;
unsigned long next_balance;
- struct mm_struct *prev_mm;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ struct mm_struct *prev_lazy_mm;
+#endif

unsigned int clock_update_flags;
u64 clock;
--
2.23.0