Re: [PATCH 1/6] mm/mmap: Restrict generic protection_map[] array visibility
From: Christophe Leroy
Date: Fri Jun 03 2022 - 08:18:16 EST
Le 03/06/2022 à 12:14, Anshuman Khandual a écrit :
> Restrict generic protection_map[] array visibility only for platforms which
> do not enable ARCH_HAS_VM_GET_PAGE_PROT. For other platforms that do define
> their own vm_get_page_prot() enabling ARCH_HAS_VM_GET_PAGE_PROT, could have
> their private static protection_map[] still implementing an array look up.
> These private protection_map[] array could do without __PXXX/__SXXX macros,
> making them redundant and dropping them off.
>
> But platforms which do not define their custom vm_get_page_prot() enabling
> ARCH_HAS_VM_GET_PAGE_PROT, will still have to provide __PXXX/__SXXX macros.
> Although this now provides a method for other willing platforms to drop off
> __PXXX/__SXX macros completely.
>
> Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
> Cc: Will Deacon <will@xxxxxxxxxx>
> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
> Cc: Paul Mackerras <paulus@xxxxxxxxx>
> Cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: x86@xxxxxxxxxx
> Cc: linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
> Cc: linuxppc-dev@xxxxxxxxxxxxxxxx
> Cc: sparclinux@xxxxxxxxxxxxxxx
> Cc: linux-mm@xxxxxxxxx
> Cc: linux-kernel@xxxxxxxxxxxxxxx
> Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx>
> ---
> arch/arm64/include/asm/pgtable-prot.h | 18 ------------------
> arch/arm64/mm/mmap.c | 21 +++++++++++++++++++++
> arch/powerpc/include/asm/pgtable.h | 2 ++
> arch/powerpc/mm/book3s64/pgtable.c | 20 ++++++++++++++++++++
> arch/sparc/include/asm/pgtable_32.h | 2 ++
> arch/sparc/include/asm/pgtable_64.h | 19 -------------------
> arch/sparc/mm/init_64.c | 20 ++++++++++++++++++++
> arch/x86/include/asm/pgtable_types.h | 19 -------------------
> arch/x86/mm/pgprot.c | 19 +++++++++++++++++++
> include/linux/mm.h | 2 ++
> mm/mmap.c | 2 +-
> 11 files changed, 87 insertions(+), 57 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
> index d564d0ecd4cd..8ed2a80c896e 100644
> --- a/arch/powerpc/include/asm/pgtable.h
> +++ b/arch/powerpc/include/asm/pgtable.h
> @@ -21,6 +21,7 @@ struct mm_struct;
> #endif /* !CONFIG_PPC_BOOK3S */
>
> /* Note due to the way vm flags are laid out, the bits are XWR */
> +#ifndef CONFIG_ARCH_HAS_VM_GET_PAGE_PROT
Ok, so until now it was common to all powerpc platforms. Now you define
a different way whether it is a PPC_BOOK3S_64 or another platform ?
What's the point ?
> #define __P000 PAGE_NONE
> #define __P001 PAGE_READONLY
> #define __P010 PAGE_COPY
> @@ -38,6 +39,7 @@ struct mm_struct;
> #define __S101 PAGE_READONLY_X
> #define __S110 PAGE_SHARED_X
> #define __S111 PAGE_SHARED_X
> +#endif
>
> #ifndef __ASSEMBLY__
>
> diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
> index 7b9966402b25..2cf10a17c0a9 100644
> --- a/arch/powerpc/mm/book3s64/pgtable.c
> +++ b/arch/powerpc/mm/book3s64/pgtable.c
> @@ -551,6 +551,26 @@ unsigned long memremap_compat_align(void)
> EXPORT_SYMBOL_GPL(memremap_compat_align);
> #endif
>
> +/* Note due to the way vm flags are laid out, the bits are XWR */
> +static pgprot_t protection_map[16] __ro_after_init = {
I don't think powerpc modifies that at all. Could be const instead of
ro_after_init.
> + [VM_NONE] = PAGE_NONE,
> + [VM_READ] = PAGE_READONLY,
> + [VM_WRITE] = PAGE_COPY,
> + [VM_WRITE | VM_READ] = PAGE_COPY,
> + [VM_EXEC] = PAGE_READONLY_X,
> + [VM_EXEC | VM_READ] = PAGE_READONLY_X,
> + [VM_EXEC | VM_WRITE] = PAGE_COPY_X,
> + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_X,
> + [VM_SHARED] = PAGE_NONE,
> + [VM_SHARED | VM_READ] = PAGE_READONLY,
> + [VM_SHARED | VM_WRITE] = PAGE_SHARED,
> + [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED,
> + [VM_SHARED | VM_EXEC] = PAGE_READONLY_X,
> + [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READONLY_X,
> + [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_X,
> + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_X
> +};
That's nice but it could apply to all powerpc platforms. Why restrict it
to book3s/64 ?
> +
> pgprot_t vm_get_page_prot(unsigned long vm_flags)
> {
> unsigned long prot = pgprot_val(protection_map[vm_flags &
> diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
> index 4866625da314..bca98b280fdd 100644
> --- a/arch/sparc/include/asm/pgtable_32.h
> +++ b/arch/sparc/include/asm/pgtable_32.h
> @@ -65,6 +65,7 @@ void paging_init(void);
> extern unsigned long ptr_in_current_pgd;
>
> /* xwr */
> +#ifndef CONFIG_ARCH_HAS_VM_GET_PAGE_PROT
CONFIG_ARCH_HAS_VM_GET_PAGE_PROT is selected by sparc64 only, is that
ifdef needed at all ?
> #define __P000 PAGE_NONE
> #define __P001 PAGE_READONLY
> #define __P010 PAGE_COPY
> @@ -82,6 +83,7 @@ extern unsigned long ptr_in_current_pgd;
> #define __S101 PAGE_READONLY
> #define __S110 PAGE_SHARED
> #define __S111 PAGE_SHARED
> +#endif
>
> /* First physical page can be anywhere, the following is needed so that
> * va-->pa and vice versa conversions work properly without performance
> diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
> index 4679e45c8348..a779418ceba9 100644
> --- a/arch/sparc/include/asm/pgtable_64.h
> +++ b/arch/sparc/include/asm/pgtable_64.h
> @@ -187,25 +187,6 @@ bool kern_addr_valid(unsigned long addr);
> #define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U
> #define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V
>
> -/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */
> -#define __P000 __pgprot(0)
> -#define __P001 __pgprot(0)
> -#define __P010 __pgprot(0)
> -#define __P011 __pgprot(0)
> -#define __P100 __pgprot(0)
> -#define __P101 __pgprot(0)
> -#define __P110 __pgprot(0)
> -#define __P111 __pgprot(0)
> -
> -#define __S000 __pgprot(0)
> -#define __S001 __pgprot(0)
> -#define __S010 __pgprot(0)
> -#define __S011 __pgprot(0)
> -#define __S100 __pgprot(0)
> -#define __S101 __pgprot(0)
> -#define __S110 __pgprot(0)
> -#define __S111 __pgprot(0)
> -
> #ifndef __ASSEMBLY__
>
> pte_t mk_pte_io(unsigned long, pgprot_t, int, unsigned long);
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index f6174df2d5af..6edc2a68b73c 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -2634,6 +2634,26 @@ void vmemmap_free(unsigned long start, unsigned long end,
> }
> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>
> +/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */
> +static pgprot_t protection_map[16] __ro_after_init = {
> + [VM_NONE] = __pgprot(0),
> + [VM_READ] = __pgprot(0),
> + [VM_WRITE] = __pgprot(0),
> + [VM_WRITE | VM_READ] = __pgprot(0),
> + [VM_EXEC] = __pgprot(0),
> + [VM_EXEC | VM_READ] = __pgprot(0),
> + [VM_EXEC | VM_WRITE] = __pgprot(0),
> + [VM_EXEC | VM_WRITE | VM_READ] = __pgprot(0),
> + [VM_SHARED] = __pgprot(0),
> + [VM_SHARED | VM_READ] = __pgprot(0),
> + [VM_SHARED | VM_WRITE] = __pgprot(0),
> + [VM_SHARED | VM_WRITE | VM_READ] = __pgprot(0),
> + [VM_SHARED | VM_EXEC] = __pgprot(0),
> + [VM_SHARED | VM_EXEC | VM_READ] = __pgprot(0),
> + [VM_SHARED | VM_EXEC | VM_WRITE] = __pgprot(0),
> + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = __pgprot(0)
> +};
__pgprot(0) is 0 so you don't need to initialise the fields at all, it
is zeroized at startup as part of BSS section.
> +
> static void prot_init_common(unsigned long page_none,
> unsigned long page_shared,
> unsigned long page_copy,
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index bc8f326be0ce..2254c1980c8e 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -420,11 +420,13 @@ extern unsigned int kobjsize(const void *objp);
> #endif
> #define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR)
>
> +#ifndef CONFIG_ARCH_HAS_VM_GET_PAGE_PROT
> /*
> * mapping from the currently active vm_flags protection bits (the
> * low four bits) to a page protection mask..
> */
> extern pgprot_t protection_map[16];
> +#endif
>
> /*
> * The default fault flags that should be used by most of the
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 61e6135c54ef..e66920414945 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -101,6 +101,7 @@ static void unmap_region(struct mm_struct *mm,
> * w: (no) no
> * x: (yes) yes
> */
> +#ifndef CONFIG_ARCH_HAS_VM_GET_PAGE_PROT
> pgprot_t protection_map[16] __ro_after_init = {
> [VM_NONE] = __P000,
> [VM_READ] = __P001,
> @@ -120,7 +121,6 @@ pgprot_t protection_map[16] __ro_after_init = {
> [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = __S111
> };
>
> -#ifndef CONFIG_ARCH_HAS_VM_GET_PAGE_PROT
Why not let architectures provide their protection_map[] and keep that
function ?
> pgprot_t vm_get_page_prot(unsigned long vm_flags)
> {
> return protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];