[PATCH 6/6] x86/tlb: optimizing munmap.

From: Alex Shi
Date: Sat May 05 2012 - 00:04:00 EST


---
arch/x86/include/asm/tlb.h | 2 +-
arch/x86/include/asm/tlbflush.h | 5 ++-
arch/x86/mm/pgtable.c | 2 +-
arch/x86/mm/tlb.c | 74 +++++++++++++++++---------------------
fs/proc/task_mmu.c | 2 +-
include/asm-generic/tlb.h | 4 +-
include/asm-generic/tlbflush.h | 3 +-
kernel/fork.c | 2 +-
mm/memory.c | 11 ++++--
9 files changed, 51 insertions(+), 54 deletions(-)

diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 829215f..505fdfe 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -4,7 +4,7 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+#define tlb_flush(tlb, start, end) flush_tlb_mm((tlb)->mm, start, end)

#include <asm-generic/tlb.h>

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 51f8b1c..d1baeac 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -89,7 +89,8 @@ static inline void __flush_tlb_one(unsigned long addr)
#define flush_tlb_all() __flush_tlb_all()
#define local_flush_tlb() __flush_tlb()

-static inline void flush_tlb_mm(struct mm_struct *mm)
+static inline void flush_tlb_mm(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
if (mm == current->active_mm)
__flush_tlb();
@@ -128,7 +129,7 @@ static inline void reset_lazy_tlbstate(void)

extern void flush_tlb_all(void);
extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_mm(struct mm_struct *, unsigned long, unsigned long);
extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
extern void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8573b83..204221c 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -168,7 +168,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
* section 8.1: in PAE mode we explicitly have to flush the
* TLB via cr3 if the top-level pgd is changed...
*/
- flush_tlb_mm(mm);
+ flush_tlb_mm(mm, 0, -1);
}
#else /* !CONFIG_X86_PAE */

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 91896dc..5f9a327 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -298,22 +298,6 @@ void flush_tlb_current_task(void)
preempt_enable();
}

-void flush_tlb_mm(struct mm_struct *mm)
-{
- preempt_disable();
-
- if (current->active_mm == mm) {
- if (current->mm)
- local_flush_tlb();
- else
- leave_mm(smp_processor_id());
- }
- if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL, 0UL);
-
- preempt_enable();
-}
-
static inline int has_large_page(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
@@ -336,39 +320,27 @@ static inline int has_large_page(struct mm_struct *mm,
return 0;
}

-void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, unsigned long vmflag)
{
- struct mm_struct *mm;
-
- if (!cpu_has_invlpg || vma->vm_flags & VM_HUGETLB
- || !tlb_flushall_factor) {
-flush_all:
- flush_tlb_mm(vma->vm_mm);
- return;
- }
-
preempt_disable();
- mm = vma->vm_mm;
if (current->active_mm == mm) {
if (current->mm) {
- unsigned long addr, vmflag = vma->vm_flags;
- unsigned act_entries, tlb_entries = 0;
-
- if (vmflag & VM_EXEC)
- tlb_entries = tlb_lli_4k[ENTRIES];
- else
- tlb_entries = tlb_lld_4k[ENTRIES];
-
- act_entries = tlb_entries > mm->total_vm ?
- mm->total_vm : tlb_entries;
-
- if ((end - start)/PAGE_SIZE >
+ if ( start == 0 || (end - start)/PAGE_SIZE >
act_entries/tlb_flushall_factor)
local_flush_tlb();
else {
+ unsigned long addr;
+ unsigned long act_entries, tlb_entries = 0;
+
+ if (vmflag & VM_EXEC)
+ tlb_entries = tlb_lli_4k[ENTRIES];
+ else
+ tlb_entries = tlb_lld_4k[ENTRIES];
+ act_entries = min(mm->total_vm, tlb_entries);
+
if (has_large_page(mm, start, end)) {
- preempt_enable();
+ local_flush_tlb();
goto flush_all;
}
for (addr = start; addr <= end;
@@ -386,11 +358,31 @@ flush_all:
leave_mm(smp_processor_id());
}
}
+
+flush_all:
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL, 0UL);
preempt_enable();
}

+void flush_tlb_mm(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ __flush_tlb_range(mm, start, end, 0UL);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long vmflag = vma->vm_flags;
+
+ if (!cpu_has_invlpg || vma->vm_flags & VM_HUGETLB
+ || !tlb_flushall_factor)
+ __flush_tlb_range(mm, 0UL, -1UL, 0);
+ else
+ __flush_tlb_range(mm, start, end, vmflag);
+}
+

void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
{
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2d60492..b2c9659 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -660,7 +660,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
walk_page_range(vma->vm_start, vma->vm_end,
&clear_refs_walk);
}
- flush_tlb_mm(mm);
+ flush_tlb_mm(mm, 0, -1);
up_read(&mm->mmap_sem);
mmput(mm);
}
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index f96a5b5..24e205d 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -112,7 +112,7 @@ static inline int tlb_fast_mode(struct mmu_gather *tlb)
}

void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm);
-void tlb_flush_mmu(struct mmu_gather *tlb);
+void tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end);
void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end);
int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);

@@ -123,7 +123,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
{
if (!__tlb_remove_page(tlb, page))
- tlb_flush_mmu(tlb);
+ tlb_flush_mmu(tlb, 0UL, -1UL);
}

/**
diff --git a/include/asm-generic/tlbflush.h b/include/asm-generic/tlbflush.h
index d6d0a88..db1d4bb 100644
--- a/include/asm-generic/tlbflush.h
+++ b/include/asm-generic/tlbflush.h
@@ -11,7 +11,8 @@

#include <linux/bug.h>

-static inline void flush_tlb_mm(struct mm_struct *mm)
+static inline void flush_tlb_mm(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
{
BUG();
}
diff --git a/kernel/fork.c b/kernel/fork.c
index b9372a0..0e895e8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -427,7 +427,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
retval = 0;
out:
up_write(&mm->mmap_sem);
- flush_tlb_mm(oldmm);
+ flush_tlb_mm(oldmm, 0, -1);
up_write(&oldmm->mmap_sem);
return retval;
fail_nomem_anon_vma_fork:
diff --git a/mm/memory.c b/mm/memory.c
index 6105f47..c25c9ea 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -218,14 +218,15 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
#endif
}

-void tlb_flush_mmu(struct mmu_gather *tlb)
+void tlb_flush_mmu(struct mmu_gather *tlb,
+ unsigned long start, unsigned long end)
{
struct mmu_gather_batch *batch;

if (!tlb->need_flush)
return;
tlb->need_flush = 0;
- tlb_flush(tlb);
+ tlb_flush(tlb, start, end);
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb_table_flush(tlb);
#endif
@@ -248,7 +249,7 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e
{
struct mmu_gather_batch *batch, *next;

- tlb_flush_mmu(tlb);
+ tlb_flush_mmu(tlb, start, end);

/* keep the page table cache within bounds */
check_pgt_cache();
@@ -396,6 +397,8 @@ void pmd_clear_bad(pmd_t *pmd)
* Note: this doesn't free the actual pages themselves. That
* has been handled earlier when unmapping all the memory regions.
*/
+// pte_free_tlb -> tlb_remove_page -> tlb_flush_mmu
+// that may cause too much tlb flushing. alex
static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long addr)
{
@@ -1204,7 +1207,7 @@ again:
*/
if (force_flush) {
force_flush = 0;
- tlb_flush_mmu(tlb);
+ tlb_flush_mmu(tlb, addr, end);
if (addr != end)
goto again;
}
--
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/