[tip:x86/mm] x86/mm: Stop calling leave_mm() in idle code

From: tip-bot for Andy Lutomirski
Date: Wed Jul 05 2017 - 06:37:56 EST


Commit-ID: 43858b4f25cf0adc5c2ca9cf5ce5fdf2532941e5
Gitweb: http://git.kernel.org/tip/43858b4f25cf0adc5c2ca9cf5ce5fdf2532941e5
Author: Andy Lutomirski <luto@xxxxxxxxxx>
AuthorDate: Thu, 29 Jun 2017 08:53:18 -0700
Committer: Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Wed, 5 Jul 2017 10:52:57 +0200

x86/mm: Stop calling leave_mm() in idle code

Now that lazy TLB suppresses all flush IPIs (as opposed to all but
the first), there's no need to leave_mm() when going idle.

This means we can get rid of the rcuidle hack in
switch_mm_irqs_off() and we can unexport leave_mm().

This also removes acpi_unlazy_tlb() from the x86 and ia64 headers,
since it has no callers any more.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
Reviewed-by: Nadav Amit <nadav.amit@xxxxxxxxx>
Reviewed-by: Borislav Petkov <bp@xxxxxxx>
Reviewed-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Arjan van de Ven <arjan@xxxxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Cc: linux-mm@xxxxxxxxx
Link: http://lkml.kernel.org/r/03c699cfd6021e467be650d6b73deaccfe4b4bd7.1498751203.git.luto@xxxxxxxxxx
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
arch/ia64/include/asm/acpi.h | 2 --
arch/x86/include/asm/acpi.h | 2 --
arch/x86/mm/tlb.c | 20 +++-----------------
drivers/acpi/processor_idle.c | 2 --
drivers/idle/intel_idle.c | 9 ++++-----
5 files changed, 7 insertions(+), 28 deletions(-)

diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index a3d0211..c86a947 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -112,8 +112,6 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
}

-#define acpi_unlazy_tlb(x)
-
#ifdef CONFIG_ACPI_NUMA
extern cpumask_t early_cpu_possible_map;
#define for_each_possible_early_cpu(cpu) \
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 2efc768..562286f 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -150,8 +150,6 @@ static inline void disable_acpi(void) { }
extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */

-#define acpi_unlazy_tlb(x) leave_mm(x)
-
#ifdef CONFIG_ACPI_APEI
static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
{
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 0982c99..2c1b888 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -50,7 +50,6 @@ void leave_mm(int cpu)

switch_mm(NULL, &init_mm, NULL);
}
-EXPORT_SYMBOL_GPL(leave_mm);

void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
@@ -117,15 +116,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen,
next_tlb_gen);
write_cr3(__pa(next->pgd));
-
- /*
- * This gets called via leave_mm() in the idle path
- * where RCU functions differently. Tracing normally
- * uses RCU, so we have to call the tracepoint
- * specially here.
- */
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
- TLB_FLUSH_ALL);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
+ TLB_FLUSH_ALL);
}

/*
@@ -167,13 +159,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
this_cpu_write(cpu_tlbstate.loaded_mm, next);
write_cr3(__pa(next->pgd));

- /*
- * This gets called via leave_mm() in the idle path where RCU
- * functions differently. Tracing normally uses RCU, so we
- * have to call the tracepoint specially here.
- */
- trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH,
- TLB_FLUSH_ALL);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
}

load_mm_cr4(next);
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 5c8aa9c..fe3d2a4 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -708,8 +708,6 @@ static DEFINE_RAW_SPINLOCK(c3_lock);
static void acpi_idle_enter_bm(struct acpi_processor *pr,
struct acpi_processor_cx *cx, bool timer_bc)
{
- acpi_unlazy_tlb(smp_processor_id());
-
/*
* Must be done before busmaster disable as we might need to
* access HPET !
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 216d7ec..2ae43f5 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -912,16 +912,15 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
struct cpuidle_state *state = &drv->states[index];
unsigned long eax = flg2MWAIT(state->flags);
unsigned int cstate;
- int cpu = smp_processor_id();

cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;

/*
- * leave_mm() to avoid costly and often unnecessary wakeups
- * for flushing the user TLB's associated with the active mm.
+ * NB: if CPUIDLE_FLAG_TLB_FLUSHED is set, this idle transition
+ * will probably flush the TLB. It's not guaranteed to flush
+ * the TLB, though, so it's not clear that we can do anything
+ * useful with this knowledge.
*/
- if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
- leave_mm(cpu);

if (!(lapic_timer_reliable_states & (1 << (cstate))))
tick_broadcast_enter();