Re: [PATCH v2 10/12] mm: mempolicy: mbind and migrate_pages support thp migration

From: Anshuman Khandual
Date: Fri Nov 25 2016 - 07:27:55 EST


On 11/08/2016 05:01 AM, Naoya Horiguchi wrote:
> This patch enables thp migration for mbind(2) and migrate_pages(2).
>
> Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
> ---
> ChangeLog v1 -> v2:
> - support pte-mapped and doubly-mapped thp
> ---
> mm/mempolicy.c | 108 +++++++++++++++++++++++++++++++++++++++++----------------
> 1 file changed, 79 insertions(+), 29 deletions(-)
>
> diff --git v4.9-rc2-mmotm-2016-10-27-18-27/mm/mempolicy.c v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/mempolicy.c
> index 77d0668..96507ee 100644
> --- v4.9-rc2-mmotm-2016-10-27-18-27/mm/mempolicy.c
> +++ v4.9-rc2-mmotm-2016-10-27-18-27_patched/mm/mempolicy.c
> @@ -94,6 +94,7 @@
> #include <linux/mm_inline.h>
> #include <linux/mmu_notifier.h>
> #include <linux/printk.h>
> +#include <linux/swapops.h>
>
> #include <asm/tlbflush.h>
> #include <asm/uaccess.h>
> @@ -486,6 +487,49 @@ static inline bool queue_pages_node_check(struct page *page,
> return node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT);
> }
>
> +static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
> + unsigned long end, struct mm_walk *walk)
> +{
> + int ret = 0;
> + struct page *page;
> + struct queue_pages *qp = walk->private;
> + unsigned long flags;
> +
> + if (unlikely(is_pmd_migration_entry(*pmd))) {
> + ret = 1;
> + goto unlock;
> + }
> + page = pmd_page(*pmd);
> + if (is_huge_zero_page(page)) {
> + spin_unlock(ptl);
> + __split_huge_pmd(walk->vma, pmd, addr, false, NULL);
> + goto out;
> + }
> + if (!thp_migration_supported()) {
> + get_page(page);
> + spin_unlock(ptl);
> + lock_page(page);
> + ret = split_huge_page(page);
> + unlock_page(page);
> + put_page(page);
> + goto out;
> + }
> + if (queue_pages_node_check(page, qp)) {
> + ret = 1;
> + goto unlock;
> + }
> +
> + ret = 1;
> + flags = qp->flags;
> + /* go to thp migration */
> + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
> + migrate_page_add(page, qp->pagelist, flags);
> +unlock:
> + spin_unlock(ptl);
> +out:
> + return ret;
> +}
> +
> /*
> * Scan through pages checking if pages follow certain conditions,
> * and move them to the pagelist if they do.
> @@ -497,30 +541,15 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
> struct page *page;
> struct queue_pages *qp = walk->private;
> unsigned long flags = qp->flags;
> - int nid, ret;
> + int ret;
> pte_t *pte;
> spinlock_t *ptl;
>
> - if (pmd_trans_huge(*pmd)) {
> - ptl = pmd_lock(walk->mm, pmd);
> - if (pmd_trans_huge(*pmd)) {
> - page = pmd_page(*pmd);
> - if (is_huge_zero_page(page)) {
> - spin_unlock(ptl);
> - __split_huge_pmd(vma, pmd, addr, false, NULL);
> - } else {
> - get_page(page);
> - spin_unlock(ptl);
> - lock_page(page);
> - ret = split_huge_page(page);
> - unlock_page(page);
> - put_page(page);
> - if (ret)
> - return 0;
> - }
> - } else {
> - spin_unlock(ptl);
> - }
> + ptl = pmd_trans_huge_lock(pmd, vma);
> + if (ptl) {
> + ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
> + if (ret)
> + return 0;
> }

I wonder if we should introduce pte_entry function along with pmd_entry
function as we are first looking for trans huge PMDs either for direct
addition into the migration list or splitting it before looking for PTEs.