Re: [PATCH v8 01/23] mm: Introduce PTE_MARKER swap entry
From: Alistair Popple
Date: Mon Apr 11 2022 - 21:21:58 EST
Hi Peter,
I noticed this while reviewing the next patch in the series. I think you need to
add CONFIG_PTE_MARKER to the below as well:
#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || \
defined(CONFIG_DEVICE_PRIVATE)
static inline int non_swap_entry(swp_entry_t entry)
{
return swp_type(entry) >= MAX_SWAPFILES;
}
#else
static inline int non_swap_entry(swp_entry_t entry)
{
return 0;
}
#endif
Otherwise marker entries will be treated as swap entries, which is wrong for
example in swapin_walk_pmd_entry() as marker entries are no longer considered
pte_none().
- Alistair
Peter Xu <peterx@xxxxxxxxxx> writes:
> This patch introduces a new swap entry type called PTE_MARKER. It can be
> installed for any pte that maps a file-backed memory when the pte is
> temporarily zapped, so as to maintain per-pte information.
>
> The information that kept in the pte is called a "marker". Here we define the
> marker as "unsigned long" just to match pgoff_t, however it will only work if
> it still fits in swp_offset(), which is e.g. currently 58 bits on x86_64.
>
> A new config CONFIG_PTE_MARKER is introduced too; it's by default off. A bunch
> of helpers are defined altogether to service the rest of the pte marker code.
>
> Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
> ---
> include/asm-generic/hugetlb.h | 9 ++++
> include/linux/swap.h | 15 ++++++-
> include/linux/swapops.h | 78 +++++++++++++++++++++++++++++++++++
> mm/Kconfig | 6 +++
> 4 files changed, 107 insertions(+), 1 deletion(-)
>
> diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
> index 8e1e6244a89d..f39cad20ffc6 100644
> --- a/include/asm-generic/hugetlb.h
> +++ b/include/asm-generic/hugetlb.h
> @@ -2,6 +2,9 @@
> #ifndef _ASM_GENERIC_HUGETLB_H
> #define _ASM_GENERIC_HUGETLB_H
>
> +#include <linux/swap.h>
> +#include <linux/swapops.h>
> +
> static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
> {
> return mk_pte(page, pgprot);
> @@ -80,6 +83,12 @@ static inline int huge_pte_none(pte_t pte)
> }
> #endif
>
> +/* Please refer to comments above pte_none_mostly() for the usage */
> +static inline int huge_pte_none_mostly(pte_t pte)
> +{
> + return huge_pte_none(pte) || is_pte_marker(pte);
> +}
> +
> #ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT
> static inline pte_t huge_pte_wrprotect(pte_t pte)
> {
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 7daae5a4b3e1..5553189d0215 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -55,6 +55,19 @@ static inline int current_is_kswapd(void)
> * actions on faults.
> */
>
> +/*
> + * PTE markers are used to persist information onto PTEs that are mapped with
> + * file-backed memories. As its name "PTE" hints, it should only be applied to
> + * the leaves of pgtables.
> + */
> +#ifdef CONFIG_PTE_MARKER
> +#define SWP_PTE_MARKER_NUM 1
> +#define SWP_PTE_MARKER (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
> + SWP_MIGRATION_NUM + SWP_DEVICE_NUM)
> +#else
> +#define SWP_PTE_MARKER_NUM 0
> +#endif
> +
> /*
> * Unaddressable device memory support. See include/linux/hmm.h and
> * Documentation/vm/hmm.rst. Short description is we need struct pages for
> @@ -107,7 +120,7 @@ static inline int current_is_kswapd(void)
>
> #define MAX_SWAPFILES \
> ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
> - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
> + SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - SWP_PTE_MARKER_NUM)
>
> /*
> * Magic header for a swap area. The first part of the union is
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index 32d517a28969..7a00627845f0 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -274,6 +274,84 @@ static inline int is_readable_migration_entry(swp_entry_t entry)
>
> #endif
>
> +typedef unsigned long pte_marker;
> +
> +#define PTE_MARKER_MASK (0)
> +
> +#ifdef CONFIG_PTE_MARKER
> +
> +static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
> +{
> + return swp_entry(SWP_PTE_MARKER, marker);
> +}
> +
> +static inline bool is_pte_marker_entry(swp_entry_t entry)
> +{
> + return swp_type(entry) == SWP_PTE_MARKER;
> +}
> +
> +static inline pte_marker pte_marker_get(swp_entry_t entry)
> +{
> + return swp_offset(entry) & PTE_MARKER_MASK;
> +}
> +
> +static inline bool is_pte_marker(pte_t pte)
> +{
> + return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
> +}
> +
> +#else /* CONFIG_PTE_MARKER */
> +
> +static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
> +{
> + /* This should never be called if !CONFIG_PTE_MARKER */
> + WARN_ON_ONCE(1);
> + return swp_entry(0, 0);
> +}
> +
> +static inline bool is_pte_marker_entry(swp_entry_t entry)
> +{
> + return false;
> +}
> +
> +static inline pte_marker pte_marker_get(swp_entry_t entry)
> +{
> + return 0;
> +}
> +
> +static inline bool is_pte_marker(pte_t pte)
> +{
> + return false;
> +}
> +
> +#endif /* CONFIG_PTE_MARKER */
> +
> +static inline pte_t make_pte_marker(pte_marker marker)
> +{
> + return swp_entry_to_pte(make_pte_marker_entry(marker));
> +}
> +
> +/*
> + * This is a special version to check pte_none() just to cover the case when
> + * the pte is a pte marker. It existed because in many cases the pte marker
> + * should be seen as a none pte; it's just that we have stored some information
> + * onto the none pte so it becomes not-none any more.
> + *
> + * It should be used when the pte is file-backed, ram-based and backing
> + * userspace pages, like shmem. It is not needed upon pgtables that do not
> + * support pte markers at all. For example, it's not needed on anonymous
> + * memory, kernel-only memory (including when the system is during-boot),
> + * non-ram based generic file-system. It's fine to be used even there, but the
> + * extra pte marker check will be pure overhead.
> + *
> + * For systems configured with !CONFIG_PTE_MARKER this will be automatically
> + * optimized to pte_none().
> + */
> +static inline int pte_none_mostly(pte_t pte)
> +{
> + return pte_none(pte) || is_pte_marker(pte);
> +}
> +
> static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
> {
> struct page *p = pfn_to_page(swp_offset(entry));
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 034d87953600..a1688b9314b2 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -909,6 +909,12 @@ config ANON_VMA_NAME
> area from being merged with adjacent virtual memory areas due to the
> difference in their name.
>
> +config PTE_MARKER
> + bool "Marker PTEs support"
> +
> + help
> + Allows to create marker PTEs for file-backed memory.
> +
> source "mm/damon/Kconfig"
>
> endmenu