[PATCH RFC 2/4] mm, personality: Implement memory-deny-write-execute as a personality flag
From: Catalin Marinas
Date: Wed Apr 13 2022 - 09:50:48 EST
The aim of such policy is to prevent a user task from inadvertently
creating an executable mapping that is or was writeable (and
subsequently made read-only).
An example of mmap() returning -EACCESS if the policy is enabled:
mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, 0, 0);
Similarly, mprotect() would return -EACCESS below:
addr = mmap(0, size, PROT_READ | PROT_EXEC, flags, 0, 0);
mprotect(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
With the past vma writeable permission tracking, mprotect() below would
also fail with -EACCESS:
addr = mmap(0, size, PROT_READ | PROT_WRITE, flags, 0, 0);
mprotect(addr, size, PROT_READ | PROT_EXEC);
While the above could be achieved by checking PROT_WRITE & PROT_EXEC on
mmap/mprotect and denying mprotect(PROT_EXEC) altogether (current
systemd MDWE approach via SECCOMP BPF filters), we want the following
scenario to succeed:
addr = mmap(0, size, PROT_READ | PROT_EXEC, flags, 0, 0);
mprotect(addr, size, PROT_READ | PROT_EXEC | PROT_BTI);
where PROT_BTI enables branch tracking identification on arm64.
The choice for a DENY_WRITE_EXEC personality flag, inherited on fork()
and execve(), was made by analogy to READ_IMPLIES_EXEC.
Note that it is sufficient to check for VM_WAS_WRITE in
map_deny_write_exec() as this flag is always set on VM_WRITE mappings.
Signed-off-by: Catalin Marinas <catalin.marinas@xxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---
include/linux/mman.h | 10 ++++++++++
include/uapi/linux/personality.h | 1 +
mm/mmap.c | 3 +++
mm/mprotect.c | 5 +++++
4 files changed, 19 insertions(+)
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 2d841ddae2aa..17e91a1bdfb3 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -166,4 +166,14 @@ calc_vm_flag_bits(unsigned long flags)
}
unsigned long vm_commit_limit(void);
+
+static inline bool map_deny_write_exec(unsigned long vm_flags)
+{
+ if (IS_ENABLED(CONFIG_ARCH_ENABLE_DENY_WRITE_EXEC) &&
+ (current->personality & DENY_WRITE_EXEC) &&
+ (vm_flags & VM_EXEC) && (vm_flags & VM_WAS_WRITE))
+ return true;
+ return false;
+}
+
#endif /* _LINUX_MMAN_H */
diff --git a/include/uapi/linux/personality.h b/include/uapi/linux/personality.h
index 49796b7756af..c8d924be3dcd 100644
--- a/include/uapi/linux/personality.h
+++ b/include/uapi/linux/personality.h
@@ -22,6 +22,7 @@ enum {
WHOLE_SECONDS = 0x2000000,
STICKY_TIMEOUTS = 0x4000000,
ADDR_LIMIT_3GB = 0x8000000,
+ DENY_WRITE_EXEC = 0x10000000,
};
/*
diff --git a/mm/mmap.c b/mm/mmap.c
index 3aa839f81e63..8e894270a80e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1579,6 +1579,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
vm_flags |= VM_NORESERVE;
}
+ if (map_deny_write_exec(vm_flags))
+ return -EACCES;
+
addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
if (!IS_ERR_VALUE(addr) &&
((vm_flags & VM_LOCKED) ||
diff --git a/mm/mprotect.c b/mm/mprotect.c
index b69ce7a7b2b7..ff0d13a4c1ed 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -627,6 +627,11 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
goto out;
}
+ if (map_deny_write_exec(newflags)) {
+ error = -EACCES;
+ goto out;
+ }
+
/* Allow architectures to sanity-check the new flags */
if (!arch_validate_flags(newflags)) {
error = -EINVAL;