[PATCH v1 5/6] PKEY: Apply PKEY_ENFORCE_API to munmap

From: jeffxu
Date: Thu May 18 2023 - 21:19:48 EST


From: Jeff Xu <jeffxu@xxxxxxxxxx>

This patch enables PKEY_ENFORCE_API for the munmap
syscall.

Signed-off-by: Jeff Xu<jeffxu@xxxxxxxxxx>
---
include/linux/mm.h | 8 +++++++-
mm/mmap.c | 31 +++++++++++++++++++++++--------
mm/mremap.c | 6 ++++--
3 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27ce77080c79..795cdeee2ea7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -429,6 +429,12 @@ extern unsigned int kobjsize(const void *objp);
#endif
#define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR)

+/* The current call stack is originated from user space or kernel */
+enum caller_origin {
+ ON_BEHALF_OF_KERNEL = 0,
+ ON_BEHALF_OF_USERSPACE,
+};
+
/*
* mapping from the currently active vm_flags protection bits (the
* low four bits) to a page protection mask..
@@ -3136,7 +3142,7 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long pgoff, unsigned long *populate, struct list_head *uf);
extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
unsigned long start, size_t len, struct list_head *uf,
- bool downgrade);
+ bool downgrade, enum caller_origin called);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
diff --git a/mm/mmap.c b/mm/mmap.c
index 13678edaa22c..3de95a6a4397 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2498,6 +2498,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
* @uf: The userfaultfd list_head
* @downgrade: set to true if the user wants to attempt to write_downgrade the
* mmap_lock
+ * @called: originated from user space or kernel
*
* This function takes a @mas that is either pointing to the previous VMA or set
* to MA_START and sets it up to remove the mapping(s). The @len will be
@@ -2507,7 +2508,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
*/
int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
unsigned long start, size_t len, struct list_head *uf,
- bool downgrade)
+ bool downgrade, enum caller_origin called)
{
unsigned long end;
struct vm_area_struct *vma;
@@ -2519,6 +2520,15 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
if (end == start)
return -EINVAL;

+ /*
+ * When called by syscall from userspace, check if the calling
+ * thread has the PKEY permission to modify the memory mapping.
+ */
+ if (called == ON_BEHALF_OF_USERSPACE &&
+ arch_check_pkey_enforce_api(mm, start, end) < 0) {
+ return -EACCES;
+ }
+
/* arch_unmap() might do unmaps itself. */
arch_unmap(mm, start, end);

@@ -2541,7 +2551,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
{
VMA_ITERATOR(vmi, mm, start);

- return do_vmi_munmap(&vmi, mm, start, len, uf, false);
+ return do_vmi_munmap(&vmi, mm, start, len, uf, false, ON_BEHALF_OF_KERNEL);
}

unsigned long mmap_region(struct file *file, unsigned long addr,
@@ -2575,7 +2585,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
}

/* Unmap any existing mapping in the area */
- if (do_vmi_munmap(&vmi, mm, addr, len, uf, false))
+ if (do_vmi_munmap(&vmi, mm, addr, len, uf, false,
+ ON_BEHALF_OF_KERNEL))
return -ENOMEM;

/*
@@ -2792,7 +2803,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
return error;
}

-static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
+/*
+ * @called: originated from user space or kernel
+ */
+static int __vm_munmap(unsigned long start, size_t len, bool downgrade,
+ enum caller_origin called)
{
int ret;
struct mm_struct *mm = current->mm;
@@ -2802,7 +2817,7 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
if (mmap_write_lock_killable(mm))
return -EINTR;

- ret = do_vmi_munmap(&vmi, mm, start, len, &uf, downgrade);
+ ret = do_vmi_munmap(&vmi, mm, start, len, &uf, downgrade, called);
/*
* Returning 1 indicates mmap_lock is downgraded.
* But 1 is not legal return value of vm_munmap() and munmap(), reset
@@ -2820,14 +2835,14 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)

int vm_munmap(unsigned long start, size_t len)
{
- return __vm_munmap(start, len, false);
+ return __vm_munmap(start, len, false, ON_BEHALF_OF_KERNEL);
}
EXPORT_SYMBOL(vm_munmap);

SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
{
addr = untagged_addr(addr);
- return __vm_munmap(addr, len, true);
+ return __vm_munmap(addr, len, true, ON_BEHALF_OF_USERSPACE);
}


@@ -3055,7 +3070,7 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
if (ret)
goto limits_failed;

- ret = do_vmi_munmap(&vmi, mm, addr, len, &uf, 0);
+ ret = do_vmi_munmap(&vmi, mm, addr, len, &uf, 0, ON_BEHALF_OF_KERNEL);
if (ret)
goto munmap_failed;

diff --git a/mm/mremap.c b/mm/mremap.c
index b11ce6c92099..a3f576effbb1 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -703,7 +703,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
}

vma_iter_init(&vmi, mm, old_addr);
- if (do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false) < 0) {
+ if (do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false,
+ ON_BEHALF_OF_KERNEL) < 0) {
/* OOM: unable to split vma, just get accounts right */
if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP))
vm_acct_memory(old_len >> PAGE_SHIFT);
@@ -993,7 +994,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
VMA_ITERATOR(vmi, mm, addr + new_len);

retval = do_vmi_munmap(&vmi, mm, addr + new_len,
- old_len - new_len, &uf_unmap, true);
+ old_len - new_len, &uf_unmap, true,
+ ON_BEHALF_OF_KERNEL);
/* Returning 1 indicates mmap_lock is downgraded to read. */
if (retval == 1) {
downgraded = true;
--
2.40.1.606.ga4b1b128d6-goog