[PATCH] mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified

From: Yang Shi
Date: Tue Mar 19 2019 - 14:36:10 EST


When MPOL_MF_STRICT was specified and an existing page was already
on a node that does not follow the policy, mbind() should return -EIO.
But commit 6f4576e3687b ("mempolicy: apply page table walker on
queue_pages_range()") broke the rule.

And, commit c8633798497c ("mm: mempolicy: mbind and migrate_pages
support thp migration") didn't return the correct value for THP mbind()
too.

If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it reaches
queue_pages_to_pte_range() or queue_pages_pmd() to check if an existing
page was already on a node that does not follow the policy. And,
non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or
MPOL_MF_MOVE_ALL was specified.

Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind/mbind02.c

Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()")
Reported-by: Cyril Hrubis <chrubis@xxxxxxx>
Cc: Vlastimil Babka <vbabka@xxxxxxx>
Cc: stable@xxxxxxxxxxxxxxx
Suggested-by: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx>
Signed-off-by: Yang Shi <yang.shi@xxxxxxxxxxxxxxxxx>
Signed-off-by: Oscar Salvador <osalvador@xxxxxxx>
---
mm/mempolicy.c | 40 +++++++++++++++++++++++++++++++++-------
1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index abe7a67..401c817 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -447,6 +447,13 @@ static inline bool queue_pages_required(struct page *page,
return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT);
}

+/*
+ * The queue_pages_pmd() may have three kind of return value.
+ * 1 - pages are placed on he right node or queued successfully.
+ * 0 - THP get split.
+ * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
+ * page was already on a node that does not follow the policy.
+ */
static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
@@ -456,7 +463,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long flags;

if (unlikely(is_pmd_migration_entry(*pmd))) {
- ret = 1;
+ ret = -EIO;
goto unlock;
}
page = pmd_page(*pmd);
@@ -473,8 +480,15 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
ret = 1;
flags = qp->flags;
/* go to thp migration */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ if (!vma_migratable(walk->vma)) {
+ ret = -EIO;
+ goto unlock;
+ }
+
migrate_page_add(page, qp->pagelist, flags);
+ } else
+ ret = -EIO;
unlock:
spin_unlock(ptl);
out:
@@ -499,8 +513,10 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
- if (ret)
+ if (ret > 0)
return 0;
+ else if (ret < 0)
+ return ret;
}

if (pmd_trans_unstable(pmd))
@@ -521,11 +537,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
continue;
if (!queue_pages_required(page, qp))
continue;
- migrate_page_add(page, qp->pagelist, flags);
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+ if (!vma_migratable(vma))
+ break;
+ migrate_page_add(page, qp->pagelist, flags);
+ } else
+ break;
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
- return 0;
+ return addr != end ? -EIO : 0;
}

static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
@@ -595,7 +616,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
unsigned long endvma = vma->vm_end;
unsigned long flags = qp->flags;

- if (!vma_migratable(vma))
+ /*
+ * Need check MPOL_MF_STRICT to return -EIO if possible
+ * regardless of vma_migratable
+ */
+ if (!vma_migratable(vma) &&
+ !(flags & MPOL_MF_STRICT))
return 1;

if (endvma > end)
@@ -622,7 +648,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
}

/* queue pages from current vma */
- if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ if (flags & MPOL_MF_VALID)
return 0;
return 1;
}
--
1.8.3.1