Re: [PATCH v11 02/14] HMM: add special swap filetype for memory migrated to device v2.

From: Hillf Danton
Date: Thu Oct 22 2015 - 03:59:31 EST


>
> When migrating anonymous memory from system memory to device memory
> CPU pte are replaced with special HMM swap entry so that page fault,
> get user page (gup), fork, ... are properly redirected to HMM helpers.
>
> This patch only add the new swap type entry and hooks HMM helpers
> functions inside the page fault and fork code path.
>
> Changed since v1:

But the subject line says this work is v11

> - Fix name when of HMM CPU page fault function.
>
> Signed-off-by: JÃrÃme Glisse <jglisse@...hat.com>
> Signed-off-by: Sherry Cheung <SCheung@...dia.com>
> Signed-off-by: Subhash Gutti <sgutti@...dia.com>
> Signed-off-by: Mark Hairgrove <mhairgrove@...dia.com>
> Signed-off-by: John Hubbard <jhubbard@...dia.com>
> Signed-off-by: Jatin Kumar <jakumar@...dia.com>
> ---
> include/linux/hmm.h | 34 ++++++++++++++++++++++++++++++++++
> include/linux/swap.h | 13 ++++++++++++-
> include/linux/swapops.h | 43 ++++++++++++++++++++++++++++++++++++++++++-
> mm/hmm.c | 21 +++++++++++++++++++++
> mm/memory.c | 22 ++++++++++++++++++++++
> 5 files changed, 131 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 4bc132a..7c66513 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h

I find no hmm.h in 4.3-rc6

> @@ -272,6 +272,40 @@ void hmm_mirror_range_dirty(struct hmm_mirror *mirror,
> unsigned long start,
> unsigned long end);
>
> +int hmm_handle_cpu_fault(struct mm_struct *mm,
> + struct vm_area_struct *vma,
> + pmd_t *pmdp, unsigned long addr,
> + unsigned flags, pte_t orig_pte);
> +
> +int hmm_mm_fork(struct mm_struct *src_mm,
> + struct mm_struct *dst_mm,
> + struct vm_area_struct *dst_vma,
> + pmd_t *dst_pmd,
> + unsigned long start,
> + unsigned long end);
> +
> +#else /* CONFIG_HMM */
> +
> +static inline int hmm_handle_cpu_fault(struct mm_struct *mm,
> + struct vm_area_struct *vma,
> + pmd_t *pmdp, unsigned long addr,
> + unsigned flags, pte_t orig_pte)
> +{
> + return VM_FAULT_SIGBUS;
> +}
> +
> +static inline int hmm_mm_fork(struct mm_struct *src_mm,
> + struct mm_struct *dst_mm,
> + struct vm_area_struct *dst_vma,
> + pmd_t *dst_pmd,
> + unsigned long start,
> + unsigned long end)
> +{
> + BUG();

s/BUG/BUILD_BUG/ ?

> + return -ENOMEM;
> +}
>
> #endif /* CONFIG_HMM */
> +
> +
> #endif
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 7ba7dcc..5c8b871 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -70,8 +70,19 @@ static inline int current_is_kswapd(void)
> #define SWP_HWPOISON_NUM 0
> #endif
>
> +/*
> + * HMM (heterogeneous memory management) used when data is in remote memory.
> + */
> +#ifdef CONFIG_HMM
> +#define SWP_HMM_NUM 1
> +#define SWP_HMM (MAX_SWAPFILES + SWP_MIGRATION_NUM + SWP_HWPOISON_NUM)
> +#else
> +#define SWP_HMM_NUM 0
> +#endif
> +
> #define MAX_SWAPFILES \
> - ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
> + ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - \
> + SWP_HWPOISON_NUM - SWP_HMM_NUM)
>
> /*
> * Magic header for a swap area. The first part of the union is
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index 5c3a5f3..8c6ba9f 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -227,7 +227,7 @@ static inline void num_poisoned_pages_inc(void)
> }
> #endif
>
> -#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
> +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || defined(CONFIG_HMM)
> static inline int non_swap_entry(swp_entry_t entry)
> {
> return swp_type(entry) >= MAX_SWAPFILES;
> @@ -239,4 +239,45 @@ static inline int non_swap_entry(swp_entry_t entry)
> }
> #endif
>
> +#ifdef CONFIG_HMM
> +static inline swp_entry_t make_hmm_entry(void)
> +{
> + /* We do not store anything inside the CPU page table entry (pte). */

pte is clear enough, no?

> + return swp_entry(SWP_HMM, 0);
> +}
> +
> +static inline swp_entry_t make_hmm_entry_locked(void)
> +{
> + /* We do not store anything inside the CPU page table entry (pte). */
> + return swp_entry(SWP_HMM, 1);
> +}
> +
> +static inline swp_entry_t make_hmm_entry_poisonous(void)
> +{
> + /* We do not store anything inside the CPU page table entry (pte). */
> + return swp_entry(SWP_HMM, 2);
> +}
> +
> +static inline int is_hmm_entry(swp_entry_t entry)
> +{
> + return (swp_type(entry) == SWP_HMM);
> +}
> +
> +static inline int is_hmm_entry_locked(swp_entry_t entry)
> +{
> + return (swp_type(entry) == SWP_HMM) && (swp_offset(entry) == 1);
> +}
> +
> +static inline int is_hmm_entry_poisonous(swp_entry_t entry)
> +{
> + return (swp_type(entry) == SWP_HMM) && (swp_offset(entry) == 2);
> +}

So SWP_HMM_LOCKED and SWP_HMM_POISON should be defined.

> +#else /* CONFIG_HMM */
> +static inline int is_hmm_entry(swp_entry_t swp)
> +{
> + return 0;
> +}
> +#endif /* CONFIG_HMM */
> +
> +
> #endif /* _LINUX_SWAPOPS_H */
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 9e5017a..7fb493f 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -416,6 +416,27 @@ static struct mmu_notifier_ops hmm_notifier_ops = {
> };
>
>
> +int hmm_handle_cpu_fault(struct mm_struct *mm,
> + struct vm_area_struct *vma,
> + pmd_t *pmdp, unsigned long addr,
> + unsigned flags, pte_t orig_pte)
> +{
> + return VM_FAULT_SIGBUS;
> +}
> +EXPORT_SYMBOL(hmm_handle_cpu_fault);
> +
> +int hmm_mm_fork(struct mm_struct *src_mm,
> + struct mm_struct *dst_mm,
> + struct vm_area_struct *dst_vma,
> + pmd_t *dst_pmd,
> + unsigned long start,
> + unsigned long end)
> +{
> + return -ENOMEM;
> +}
> +EXPORT_SYMBOL(hmm_mm_fork);
> +
> +
> struct mm_pt_iter {
> struct mm_struct *mm;
> pte_t *ptep;
> diff --git a/mm/memory.c b/mm/memory.c
> index bbab5e9..08bc37e 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -53,6 +53,7 @@
> #include <linux/writeback.h>
> #include <linux/memcontrol.h>
> #include <linux/mmu_notifier.h>
> +#include <linux/hmm.h>
> #include <linux/kallsyms.h>
> #include <linux/swapops.h>
> #include <linux/elf.h>
> @@ -894,9 +895,11 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
> pte_t *orig_src_pte, *orig_dst_pte;
> pte_t *src_pte, *dst_pte;
> spinlock_t *src_ptl, *dst_ptl;
> + unsigned cnt_hmm_entry = 0;

s/cnt_hmm_entry/hmm_ptes/ ?

> int progress = 0;
> int rss[NR_MM_COUNTERS];
> swp_entry_t entry = (swp_entry_t){0};
> + unsigned long start;
>
> again:
> init_rss_vec(rss);
> @@ -910,6 +913,7 @@ again:
> orig_src_pte = src_pte;
> orig_dst_pte = dst_pte;
> arch_enter_lazy_mmu_mode();
> + start = addr;
>
> do {
> /*
> @@ -926,6 +930,12 @@ again:
> progress++;
> continue;
> }
> + if (unlikely(!pte_present(*src_pte))) {
> + entry = pte_to_swp_entry(*src_pte);
> +
> + if (is_hmm_entry(entry))
> + cnt_hmm_entry++;
> + }
> entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
> vma, addr, rss);
> if (entry.val)
> @@ -940,6 +950,15 @@ again:
> pte_unmap_unlock(orig_dst_pte, dst_ptl);
> cond_resched();
>
> + if (cnt_hmm_entry) {
> + int ret;
> +
> + ret = hmm_mm_fork(src_mm, dst_mm, dst_vma,
> + dst_pmd, start, end);

Given start, s/end/addr/, no?

> + if (ret)
> + return ret;
> + }
> +
> if (entry.val) {
> if (add_swap_count_continuation(entry, GFP_KERNEL) < 0)
> return -ENOMEM;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/