[PATCH RFC 3/7] kernel/fork: always deny write access to current MM exe_file

From: David Hildenbrand
Date: Fri Apr 23 2021 - 09:18:15 EST


We want to remove VM_DENYWRITE only currently only used when mapping the
executable during exec. During exec, we already deny_write_access() the
executable, however, after exec completes the VMAs mapped
with VM_DENYWRITE effectively keeps write access denied via
deny_write_access().

Let's deny write access when setting the MM exe_file. With this change, we
can remove VM_DENYWRITE for mapping executables.

This represents a minor user space visible change:
sys_prctl(PR_SET_MM_EXE_FILE) can now fail if the file is already
opened writable. Also, after sys_prctl(PR_SET_MM_EXE_FILE), the file
cannot be opened writable. Note that we can already fail with -EACCES if
the file doesn't have execute permissions.

Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
kernel/fork.c | 39 ++++++++++++++++++++++++++++++++++-----
1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 199463625adc..0681f2973667 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -472,6 +472,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
{
struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
struct rb_node **rb_link, *rb_parent;
+ struct file *exe_file;
int retval;
unsigned long charge;
LIST_HEAD(uf);
@@ -489,7 +490,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);

/* No ordering required: file already has been exposed. */
- RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+ exe_file = get_mm_exe_file(oldmm);
+ RCU_INIT_POINTER(mm->exe_file, exe_file);
+ if (exe_file)
+ deny_write_access(exe_file);

mm->total_vm = oldmm->total_vm;
mm->data_vm = oldmm->data_vm;
@@ -634,8 +638,13 @@ static inline void mm_free_pgd(struct mm_struct *mm)
#else
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
{
+ struct file *exe_file;
+
mmap_write_lock(oldmm);
- RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
+ exe_file = get_mm_exe_file(oldmm);
+ RCU_INIT_POINTER(mm->exe_file, exe_file);
+ if (exe_file)
+ deny_write_access(exe_file);
mmap_write_unlock(oldmm);
return 0;
}
@@ -1152,11 +1161,19 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
*/
old_exe_file = rcu_dereference_raw(mm->exe_file);

- if (new_exe_file)
+ if (new_exe_file) {
get_file(new_exe_file);
+ /*
+ * exec code is required to deny_write_access() successfully,
+ * so this cannot fail
+ */
+ deny_write_access(new_exe_file);
+ }
rcu_assign_pointer(mm->exe_file, new_exe_file);
- if (old_exe_file)
+ if (old_exe_file) {
+ allow_write_access(old_exe_file);
fput(old_exe_file);
+ }
}

int atomic_set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
@@ -1183,10 +1200,22 @@ int atomic_set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
}

/* set the new file, lockless */
+ ret = deny_write_access(new_exe_file);
+ if (ret)
+ return -EACCES;
get_file(new_exe_file);
+
old_exe_file = xchg(&mm->exe_file, new_exe_file);
- if (old_exe_file)
+ if (old_exe_file) {
+ /*
+ * Don't race with dup_mmap() getting the file and disallowing
+ * write access while someone might open the file writable.
+ */
+ mmap_read_lock(mm);
+ allow_write_access(old_exe_file);
fput(old_exe_file);
+ mmap_read_unlock(mm);
+ }
return 0;
}

--
2.30.2