Re: [PATCH] mm/mmap: Define index macros for protection_map[]

From: Christoph Hellwig
Date: Mon Sep 27 2021 - 11:00:08 EST


On Mon, Sep 27, 2021 at 08:52:00AM +0530, Anshuman Khandual wrote:
> protection_map[] maps the lower four bits from vm_flags into platform page
> protection mask. Default initialization (and possible re-initialization in
> the platform) does not make it clear that these indices are just derived
> from various vm_flags protections (VM_SHARED, VM_READ, VM_WRITE, VM_EXEC).
> This defines macros for protection_map[] indices which concatenate various
> vm_flag attributes, making it clear and explicit.

I dont think this is all that helpful. The main issue here is that
protection_map is a pointless obsfucation ad should be replaced with a
simple switch statement provided by each architecture. See the below
WIP which just works for x86 and without pagetable debugging for where I
think we should be going.

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ab83c22d274e7..70d8ae60a416f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -74,8 +74,8 @@ config X86
select ARCH_HAS_EARLY_DEBUG if KGDB
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
- select ARCH_HAS_FILTER_PGPROT
select ARCH_HAS_FORTIFY_SOURCE
+ select ARCH_HAS_GET_PAGE_PROT
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64 && STACK_VALIDATION
select ARCH_HAS_MEM_ENCRYPT
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 448cd01eb3ecb..a0cc70b056385 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -646,11 +646,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)

#define canon_pgprot(p) __pgprot(massage_pgprot(p))

-static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
-{
- return canon_pgprot(prot);
-}
-
static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
enum page_cache_mode pcm,
enum page_cache_mode new_pcm)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 40497a9020c6e..1a9dd933088e6 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -228,25 +228,6 @@ enum page_cache_mode {

#endif /* __ASSEMBLY__ */

-/* xwr */
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_EXEC
-#define __P101 PAGE_READONLY_EXEC
-#define __P110 PAGE_COPY_EXEC
-#define __P111 PAGE_COPY_EXEC
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_EXEC
-#define __S101 PAGE_READONLY_EXEC
-#define __S110 PAGE_SHARED_EXEC
-#define __S111 PAGE_SHARED_EXEC
-
/*
* early identity mapping pte attrib macros.
*/
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index d4a8d0424bfbf..775dbd3aff736 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -5,20 +5,6 @@
#define MAP_32BIT 0x40 /* only give out 32bit addresses */

#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-/*
- * Take the 4 protection key bits out of the vma->vm_flags
- * value and turn them in to the bits that we can put in
- * to a pte.
- *
- * Only override these if Protection Keys are available
- * (which is only on 64-bit).
- */
-#define arch_vm_get_page_prot(vm_flags) __pgprot( \
- ((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) | \
- ((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) | \
- ((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) | \
- ((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0))
-
#define arch_calc_vm_prot_bits(prot, key) ( \
((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \
((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 5864219221ca8..b44806c5b3de8 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -16,8 +16,10 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif

-obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
- pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
+obj-y := init.o init_$(BITS).o fault.o ioremap.o \
+ extable.o mmap.o pgtable.o physaddr.o \
+ setup_nx.o tlb.o cpu_entry_area.o \
+ maccess.o pgprot.o

obj-y += pat/

diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index ff08dc4636347..e1d1168ed25e8 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -189,10 +189,6 @@ void __init sme_early_init(void)

__supported_pte_mask = __sme_set(__supported_pte_mask);

- /* Update the protection map with memory encryption mask */
- for (i = 0; i < ARRAY_SIZE(protection_map); i++)
- protection_map[i] = pgprot_encrypted(protection_map[i]);
-
if (sev_active())
swiotlb_force = SWIOTLB_FORCE;
}
diff --git a/arch/x86/mm/pgprot.c b/arch/x86/mm/pgprot.c
new file mode 100644
index 0000000000000..4d8e9dbcce993
--- /dev/null
+++ b/arch/x86/mm/pgprot.c
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+
+static inline pgprot_t __vm_get_page_prot(unsigned long vm_flags)
+{
+ switch (vm_flags & (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)) {
+ case 0:
+ return PAGE_NONE;
+ case VM_READ:
+ return PAGE_READONLY;
+ case VM_WRITE:
+ return PAGE_COPY;
+ case VM_READ | VM_WRITE:
+ return PAGE_COPY;
+ case VM_EXEC:
+ case VM_EXEC | VM_READ:
+ return PAGE_READONLY_EXEC;
+ case VM_EXEC | VM_WRITE:
+ case VM_EXEC | VM_READ | VM_WRITE:
+ return PAGE_COPY_EXEC;
+ case VM_SHARED:
+ return PAGE_NONE;
+ case VM_SHARED | VM_READ:
+ return PAGE_READONLY;
+ case VM_SHARED | VM_WRITE:
+ case VM_SHARED | VM_READ | VM_WRITE:
+ return PAGE_SHARED;
+ case VM_SHARED | VM_EXEC:
+ case VM_SHARED | VM_READ | VM_EXEC:
+ return PAGE_READONLY_EXEC;
+ case VM_SHARED | VM_WRITE | VM_EXEC:
+ case VM_SHARED | VM_READ | VM_WRITE | VM_EXEC:
+ return PAGE_SHARED_EXEC;
+ default:
+ BUILD_BUG();
+ return PAGE_NONE;
+ }
+}
+
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+ unsigned long val = pgprot_val(__vm_get_page_prot(vm_flags));
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+ /*
+ * Take the 4 protection key bits out of the vma->vm_flags value and
+ * turn them in to the bits that we can put in to a pte.
+ *
+ * Only override these if Protection Keys are available (which is only
+ * on 64-bit).
+ */
+ if (vm_flags & VM_PKEY_BIT0)
+ val |= _PAGE_PKEY_BIT0;
+ if (vm_flags & VM_PKEY_BIT1)
+ val |= _PAGE_PKEY_BIT1;
+ if (vm_flags & VM_PKEY_BIT2)
+ val |= _PAGE_PKEY_BIT2;
+ if (vm_flags & VM_PKEY_BIT3)
+ val |= _PAGE_PKEY_BIT3;
+#endif
+
+ val = __sme_set(val);
+ if (val & _PAGE_PRESENT)
+ val &= __supported_pte_mask;
+ return __pgprot(val);
+}
+EXPORT_SYMBOL(vm_get_page_prot);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f9..def17c5fb6afc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -428,12 +428,6 @@ extern unsigned int kobjsize(const void *objp);
#endif
#define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR)

-/*
- * mapping from the currently active vm_flags protection bits (the
- * low four bits) to a page protection mask..
- */
-extern pgprot_t protection_map[16];
-
/**
* enum fault_flag - Fault flag definitions.
* @FAULT_FLAG_WRITE: Fault was a write fault.
diff --git a/mm/Kconfig b/mm/Kconfig
index d16ba9249bc53..d9fa0bac189b4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -894,6 +894,9 @@ config IO_MAPPING
config SECRETMEM
def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED

+config ARCH_HAS_GET_PAGE_PROT
+ bool
+
source "mm/damon/Kconfig"

endmenu
diff --git a/mm/mmap.c b/mm/mmap.c
index 88dcc5c252255..c6031dfcedd18 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -80,6 +80,7 @@ static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end);

+#ifndef CONFIG_ARCH_HAS_GET_PAGE_PROT
/* description of effects of mapping type and prot in current implementation.
* this is due to the limited x86 page protection hardware. The expected
* behavior is in parens:
@@ -100,11 +101,6 @@ static void unmap_region(struct mm_struct *mm,
* w: (no) no
* x: (yes) yes
*/
-pgprot_t protection_map[16] __ro_after_init = {
- __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
- __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
-};
-
#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
{
@@ -112,15 +108,57 @@ static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
}
#endif

+static pgprot_t __vm_get_page_prot(unsigned long vm_flags)
+{
+ switch (vm_flags & (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)) {
+ case 0:
+ return __P000;
+ case VM_READ:
+ return __P001;
+ case VM_WRITE:
+ return __P010;
+ case VM_READ | VM_WRITE:
+ return __P011;
+ case VM_EXEC:
+ return __P100;
+ case VM_EXEC | VM_READ:
+ return __P101;
+ case VM_EXEC | VM_WRITE:
+ return __P110;
+ case VM_EXEC | VM_READ | VM_WRITE:
+ return __P111;
+ case VM_SHARED:
+ return __S000;
+ case VM_SHARED | VM_READ:
+ return __S001;
+ case VM_SHARED | VM_WRITE:
+ return __S010;
+ case VM_SHARED | VM_READ | VM_WRITE:
+ return __S011;
+ case VM_SHARED | VM_EXEC:
+ return __S100;
+ case VM_SHARED | VM_READ | VM_EXEC:
+ return __S101;
+ case VM_SHARED | VM_WRITE | VM_EXEC:
+ return __S110;
+ case VM_SHARED | VM_READ | VM_WRITE | VM_EXEC:
+ return __S111;
+ default:
+ BUG();
+ }
+}
+
pgprot_t vm_get_page_prot(unsigned long vm_flags)
{
- pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
- (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
- pgprot_val(arch_vm_get_page_prot(vm_flags)));
+ pgprot_t ret;
+
+ ret = __pgprot(pgprot_val(__vm_get_page_prot(vm_flags)) |
+ pgprot_val(arch_vm_get_page_prot(vm_flags)));

return arch_filter_pgprot(ret);
}
EXPORT_SYMBOL(vm_get_page_prot);
+#endif /* CONFIG_ARCH_HAS_GET_PAGE_PROT */

static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
{
@@ -1660,10 +1698,9 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
#endif /* __ARCH_WANT_SYS_OLD_MMAP */

/*
- * Some shared mappings will want the pages marked read-only
- * to track write events. If so, we'll downgrade vm_page_prot
- * to the private version (using protection_map[] without the
- * VM_SHARED bit).
+ * Some shared mappings will want the pages marked read-only to track write
+ * events. If so, we'll downgrade vm_page_prot to the private version without
+ * the VM_SHARED bit.
*/
int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
{