[RFC][PATCH 1/5] mm: Rework {set,clear,mm}_tlb_flush_pending()
From: Peter Zijlstra
Date: Wed Jun 07 2017 - 12:22:15 EST
Commit:
af2c1401e6f9 ("mm: numa: guarantee that tlb_flush_pending updates are visible before page table updates")
added smp_mb__before_spinlock() to set_tlb_flush_pending(). I think we
can solve the same problem without this barrier.
If instead we mandate that mm_tlb_flush_pending() is used while
holding the PTL we're guaranteed to observe prior
set_tlb_flush_pending() instances.
For this to work we need to rework migrate_misplaced_transhuge_page()
a little and move the test up into do_huge_pmd_numa_page().
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Rik van Riel <riel@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -527,18 +527,16 @@ static inline cpumask_t *mm_cpumask(stru
*/
static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
{
- barrier();
+ /*
+ * Must be called with PTL held; such that our PTL acquire will have
+ * observed the store from set_tlb_flush_pending().
+ */
return mm->tlb_flush_pending;
}
static inline void set_tlb_flush_pending(struct mm_struct *mm)
{
mm->tlb_flush_pending = true;
-
- /*
- * Guarantee that the tlb_flush_pending store does not leak into the
- * critical section updating the page tables
- */
- smp_mb__before_spinlock();
+ barrier();
}
/* Clearing is done after a TLB flush, which also provides a barrier. */
static inline void clear_tlb_flush_pending(struct mm_struct *mm)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1410,6 +1410,7 @@ int do_huge_pmd_numa_page(struct vm_faul
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
int page_nid = -1, this_nid = numa_node_id();
int target_nid, last_cpupid = -1;
+ bool need_flush = false;
bool page_locked;
bool migrated = false;
bool was_writable;
@@ -1490,10 +1491,29 @@ int do_huge_pmd_numa_page(struct vm_faul
}
/*
+ * Since we took the NUMA fault, we must have observed the !accessible
+ * bit. Make sure all other CPUs agree with that, to avoid them
+ * modifying the page we're about to migrate.
+ *
+ * Must be done under PTL such that we'll observe the relevant
+ * set_tlb_flush_pending().
+ */
+ if (mm_tlb_flush_pending(mm))
+ need_flush = true;
+
+ /*
* Migrate the THP to the requested node, returns with page unlocked
* and access rights restored.
*/
spin_unlock(vmf->ptl);
+
+ /*
+ * We are not sure a pending tlb flush here is for a huge page
+ * mapping or not. Hence use the tlb range variant
+ */
+ if (need_flush)
+ flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+
migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
vmf->pmd, pmd, vmf->address, page, target_nid);
if (migrated) {
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1935,12 +1935,6 @@ int migrate_misplaced_transhuge_page(str
put_page(new_page);
goto out_fail;
}
- /*
- * We are not sure a pending tlb flush here is for a huge page
- * mapping or not. Hence use the tlb range variant
- */
- if (mm_tlb_flush_pending(mm))
- flush_tlb_range(vma, mmun_start, mmun_end);
/* Prepare a page as a migration target */
__SetPageLocked(new_page);