[PATCH] khugepaged: simplify khugepaged vs. __mmput

From: Michal Hocko
Date: Thu Jun 02 2016 - 04:38:37 EST


__khugepaged_exit is called during the final __mmput and it employs a
complex synchronization dances to make sure it doesn't race with the
khugepaged which might be scanning this mm at the same time. This is
all caused by the fact that khugepaged doesn't pin mm_users. Things
would simplify considerably if we simply check the mm at
khugepaged_scan_mm_slot and if mm_users was already 0 then we know it
is dead and we can unhash the mm_slot and move on to another one. This
will also guarantee that __khugepaged_exit cannot race with khugepaged
and so we can free up the slot if it is still hashed.

Signed-off-by: Michal Hocko <mhocko@xxxxxxxx>
---
mm/huge_memory.c | 40 ++++++++++++++++------------------------
1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index de62bd991827..3dfc62b1a90c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1936,7 +1936,8 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,

static inline int khugepaged_test_exit(struct mm_struct *mm)
{
- return atomic_read(&mm->mm_users) == 0;
+ /* the only pin is from khugepaged_scan_mm_slot */
+ return atomic_read(&mm->mm_users) <= 1;
}

int __khugepaged_enter(struct mm_struct *mm)
@@ -1948,8 +1949,6 @@ int __khugepaged_enter(struct mm_struct *mm)
if (!mm_slot)
return -ENOMEM;

- /* __khugepaged_exit() must not run from under us */
- VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
free_mm_slot(mm_slot);
return 0;
@@ -1999,29 +1998,11 @@ void __khugepaged_exit(struct mm_struct *mm)

spin_lock(&khugepaged_mm_lock);
mm_slot = get_mm_slot(mm);
- if (mm_slot && khugepaged_scan.mm_slot != mm_slot) {
- hash_del(&mm_slot->hash);
- list_del(&mm_slot->mm_node);
- free = 1;
- }
- spin_unlock(&khugepaged_mm_lock);
-
- if (free) {
+ if (mm_slot) {
+ collect_mm_slot(mm_slot);
clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
- free_mm_slot(mm_slot);
- mmdrop(mm);
- } else if (mm_slot) {
- /*
- * This is required to serialize against
- * khugepaged_test_exit() (which is guaranteed to run
- * under mmap sem read mode). Stop here (after we
- * return all pagetables will be destroyed) until
- * khugepaged has finished working on the pagetables
- * under the mmap_sem.
- */
- down_write(&mm->mmap_sem);
- up_write(&mm->mmap_sem);
}
+ spin_unlock(&khugepaged_mm_lock);
}

static void release_pte_page(struct page *page)
@@ -2780,6 +2761,16 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
khugepaged_scan.address = 0;
khugepaged_scan.mm_slot = mm_slot;
}
+
+ /*
+ * Do not even try to do anything if the current mm is already
+ * dead. khugepaged_mm_lock will make sure only this or
+ * __khugepaged_exit does the unhasing.
+ */
+ if (!atomic_inc_not_zero(&mm_slot->mm->mm_users)) {
+ collect_mm_slot(mm_slot);
+ return progress;
+ }
spin_unlock(&khugepaged_mm_lock);

mm = mm_slot->mm;
@@ -2863,6 +2854,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,

collect_mm_slot(mm_slot);
}
+ mmput(mm);

return progress;
}
--
2.8.1

--
Michal Hocko
SUSE Labs