[RFC][PATCH 06/15] mm: Provide generic range tracking and flushing

From: Peter Zijlstra
Date: Mon Mar 07 2011 - 12:50:02 EST


In order to convert various architectures to generic tlb we need to
provide some extra infrastructure to track the range of the flushed
page tables.

There are two mmu_gather cases to consider:

unmap_region()
tlb_gather_mmu()
unmap_vmas()
for (; vma; vma = vma->vm_next)
unmap_page_range()
tlb_start_vma() -> flush cache range/track vm_flags
zap_*_range()
arch_enter_lazy_mmu_mode()
ptep_get_and_clear_full() -> batch/track external tlbs
tlb_remove_tlb_entry() -> track range/external tlbs
tlb_remove_page() -> batch page
arch_lazy_leave_mmu_mode() -> flush external tlbs
tlb_end_vma()
free_pgtables()
while (vma)
unlink_*_vma()
free_*_range()
*_free_tlb() -> track range/batch page
tlb_finish_mmu() -> flush TLBs and flush everything
free vmas

and:

shift_arg_pages()
tlb_gather_mmu()
free_*_range()
*_free_tlb() -> track tlb range
tlb_finish_mmu() -> flush things

There are various reasons that we need to flush TLBs _after_ tearing
down the page-tables themselves. For some architectures (x86 among
others) this serializes against (both hardware and software) page
table walkers like gup_fast().

For others (ARM) this is (also) needed to evict stale page-table
caches - ARM LPAE mode apparently caches page tables and concurrent
hardware walkers could re-populate these caches if the final tlb flush
were to be from tlb_end_vma() since an concurrent walk could still be
in progress.

So implement generic range tracking over both clearing the PTEs and
tearing down the page-tables.

Cc: Russell King <rmk@xxxxxxxxxxxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: Paul Mundt <lethal@xxxxxxxxxxxx>
Cc: Jeff Dike <jdike@xxxxxxxxxxx>
Cc: Hans-Christian Egtvedt <hans-christian.egtvedt@xxxxxxxxx>
Cc: Ralf Baechle <ralf@xxxxxxxxxxxxxx>
Cc: Kyle McMartin <kyle@xxxxxxxxxxx>
Cc: James Bottomley <jejb@xxxxxxxxxxxxxxxx>
Cc: David Miller <davem@xxxxxxxxxxxxx>
Cc: Chris Zankel <chris@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---
arch/Kconfig | 3 +
include/asm-generic/tlb.h | 122 ++++++++++++++++++++++++++++++++++++++--------
2 files changed, 105 insertions(+), 20 deletions(-)

Index: linux-2.6/arch/Kconfig
===================================================================
--- linux-2.6.orig/arch/Kconfig
+++ linux-2.6/arch/Kconfig
@@ -187,4 +187,7 @@ config ARCH_HAVE_NMI_SAFE_CMPXCHG
config HAVE_RCU_TABLE_FREE
bool

+config HAVE_MMU_GATHER_RANGE
+ bool
+
source "kernel/gcov/Kconfig"
Index: linux-2.6/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.orig/include/asm-generic/tlb.h
+++ linux-2.6/include/asm-generic/tlb.h
@@ -78,7 +78,8 @@ struct mmu_gather_batch {
#define MAX_GATHER_BATCH \
((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))

-/* struct mmu_gather is an opaque type used by the mm code for passing around
+/*
+ * struct mmu_gather is an opaque type used by the mm code for passing around
* any data needed by arch specific code for tlb_remove_page.
*/
struct mmu_gather {
@@ -86,6 +87,10 @@ struct mmu_gather {
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
struct mmu_table_batch *batch;
#endif
+#ifdef CONFIG_HAVE_MMU_GATHER_RANGE
+ unsigned long start, end;
+ unsigned long vm_flags;
+#endif
unsigned int need_flush : 1, /* Did free PTEs */
fast_mode : 1; /* No batching */

@@ -106,6 +111,75 @@ struct mmu_gather {
#define tlb_fast_mode(tlb) 1
#endif

+#ifdef CONFIG_HAVE_MMU_GATHER_RANGE
+
+static inline void tlb_init_range(struct mmu_gather *tlb)
+{
+ tlb->start = TASK_SIZE;
+ tlb->end = 0;
+ tlb->vm_flags = 0;
+}
+
+static inline void
+tlb_track_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end)
+{
+ if (!tlb->fullmm) {
+ tlb->start = min(tlb->start, addr);
+ tlb->end = max(tlb->end, end);
+ }
+}
+
+static inline void
+tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+ if (!tlb->fullmm) {
+ flush_cache_range(vma, vma->vm_start, vma->vm_end);
+ tlb->vm_flags |= vma->vm_flags;
+ }
+}
+
+static inline void
+tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+}
+
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+ /*
+ * Fake VMA, some architectures use VM_EXEC to flush I-TLB/I$,
+ * and some use VM_HUGETLB since they have separate HPAGE TLBs.
+ *
+ * Since its an artificial VMA, VM_HUGETLB means only part of
+ * the range can be HUGE, so you always have to flush normal
+ * TLBs.
+ */
+ struct vm_area_struct vma = {
+ .vm_mm = tlb->mm,
+ .vm_flags = tlb->vm_flags & (VM_EXEC | VM_HUGETLB),
+ };
+
+ flush_tlb_range(&vma, tlb->start, tlb->end);
+ tlb_init_range(tlb);
+}
+
+#else /* CONFIG_HAVE_MMU_GATHER_RANGE */
+
+static inline void tlb_init_range(struct mmu_gather *tlb)
+{
+}
+
+/*
+ * Macro avoids argument evaluation.
+ */
+#define tlb_track_range(tlb, addr, end) do { } while (0)
+
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+ flush_tlb_mm(tlb->mm);
+}
+
+#endif /* CONFIG_HAVE_MMU_GATHER_RANGE */
+
static inline int tlb_next_batch(struct mmu_gather *tlb)
{
struct mmu_gather_batch *batch;
@@ -146,6 +220,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, s
tlb->local.max = ARRAY_SIZE(tlb->__pages);
tlb->active = &tlb->local;

+ tlb_init_range(tlb);
+
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb->batch = NULL;
#endif
@@ -163,7 +239,7 @@ tlb_flush_mmu(struct mmu_gather *tlb)

if (!tlb->fullmm && tlb->need_flush) {
tlb->need_flush = 0;
- flush_tlb_mm(tlb->mm);
+ tlb_flush(tlb);
}

#ifdef CONFIG_HAVE_RCU_TABLE_FREE
@@ -240,32 +316,38 @@ static inline void tlb_remove_page(struc
* later optimise away the tlb invalidate. This helps when userspace is
* unmapping already-unmapped pages, which happens quite a lot.
*/
-#define tlb_remove_tlb_entry(tlb, ptep, address) \
- do { \
- tlb->need_flush = 1; \
- __tlb_remove_tlb_entry(tlb, ptep, address); \
+#define tlb_remove_tlb_entry(tlb, ptep, addr) \
+ do { \
+ tlb->need_flush = 1; \
+ tlb_track_range(tlb, addr, addr + PAGE_SIZE); \
+ __tlb_remove_tlb_entry(tlb, ptep, addr); \
} while (0)

-#define pte_free_tlb(tlb, ptep, address) \
- do { \
- tlb->need_flush = 1; \
- __pte_free_tlb(tlb, ptep, address); \
+#define pte_free_tlb(tlb, ptep, addr) \
+ do { \
+ tlb->need_flush = 1; \
+ tlb_track_range(tlb, addr, pmd_addr_end(addr, TASK_SIZE));\
+ __pte_free_tlb(tlb, ptep, addr); \
} while (0)

-#ifndef __ARCH_HAS_4LEVEL_HACK
-#define pud_free_tlb(tlb, pudp, address) \
- do { \
- tlb->need_flush = 1; \
- __pud_free_tlb(tlb, pudp, address); \
+#define pmd_free_tlb(tlb, pmdp, addr) \
+ do { \
+ tlb->need_flush = 1; \
+ tlb_track_range(tlb, addr, pud_addr_end(addr, TASK_SIZE));\
+ __pmd_free_tlb(tlb, pmdp, addr); \
} while (0)
-#endif

-#define pmd_free_tlb(tlb, pmdp, address) \
- do { \
- tlb->need_flush = 1; \
- __pmd_free_tlb(tlb, pmdp, address); \
+#ifndef __ARCH_HAS_4LEVEL_HACK
+#define pud_free_tlb(tlb, pudp, addr) \
+ do { \
+ tlb->need_flush = 1; \
+ tlb_track_range(tlb, addr, pgd_addr_end(addr, TASK_SIZE));\
+ __pud_free_tlb(tlb, pudp, addr); \
} while (0)
+#endif

+#ifndef tlb_migrate_finish
#define tlb_migrate_finish(mm) do {} while (0)
+#endif

#endif /* _ASM_GENERIC__TLB_H */


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/