[RFC PATCH 1/3] mm: drop the assumption that VM_SHARED always implies writable

From: Lorenzo Stoakes
Date: Mon Apr 03 2023 - 18:28:51 EST


There are places in the kernel where there is an implicit assumption that
VM_SHARED VMAs must either be writable or might become writable via
e.g. mprotect().

We can explicitly check for the writable, shared case while remaining
conservative - If VM_MAYWRITE is not set then, by definition, the memory
can never be written to.

Update these checks to also check for VM_MAYWRITE.

Suggested-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Signed-off-by: Lorenzo Stoakes <lstoakes@xxxxxxxxx>
---
include/linux/fs.h | 4 ++--
include/linux/mm.h | 11 +++++++++++
kernel/fork.c | 2 +-
mm/filemap.c | 2 +-
mm/madvise.c | 2 +-
mm/mmap.c | 12 ++++++------
6 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c85916e9f7db..373e1edd719c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -410,7 +410,7 @@ extern const struct address_space_operations empty_aops;
* It is also used to block modification of page cache contents through
* memory mappings.
* @gfp_mask: Memory allocation flags to use for allocating pages.
- * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings.
* @nr_thps: Number of THPs in the pagecache (non-shmem only).
* @i_mmap: Tree of private and shared mappings.
* @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
@@ -513,7 +513,7 @@ static inline int mapping_mapped(struct address_space *mapping)

/*
* Might pages of this file have been modified in userspace?
- * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
+ * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap
* marks vma as VM_SHARED if it is shared, and the file was opened for
* writing i.e. vma may be mprotected writable even if now readonly.
*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 898ece0a3802..8e64041b1703 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -862,6 +862,17 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
return vma->vm_flags & VM_ACCESS_FLAGS;
}

+static inline bool is_shared_maywrite(vm_flags_t vm_flags)
+{
+ return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
+ (VM_SHARED | VM_MAYWRITE);
+}
+
+static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
+{
+ return is_shared_maywrite(vma->vm_flags);
+}
+
static inline
struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
{
diff --git a/kernel/fork.c b/kernel/fork.c
index 2066a57786a8..58f257d60fee 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -733,7 +733,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,

get_file(file);
i_mmap_lock_write(mapping);
- if (tmp->vm_flags & VM_SHARED)
+ if (vma_is_shared_maywrite(tmp))
mapping_allow_writable(mapping);
flush_dcache_mmap_lock(mapping);
/* insert tmp into the share list, just after mpnt */
diff --git a/mm/filemap.c b/mm/filemap.c
index a34abfe8c654..4d896515032c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3607,7 +3607,7 @@ int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
*/
int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
{
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+ if (vma_is_shared_maywrite(vma))
return -EINVAL;
return generic_file_mmap(file, vma);
}
diff --git a/mm/madvise.c b/mm/madvise.c
index 340125d08c03..606c395c4ddd 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -981,7 +981,7 @@ static long madvise_remove(struct vm_area_struct *vma,
return -EINVAL;
}

- if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
+ if (!vma_is_shared_maywrite(vma))
return -EACCES;

offset = (loff_t)(start - vma->vm_start)
diff --git a/mm/mmap.c b/mm/mmap.c
index 51cd747884e3..c96dcce90772 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -106,7 +106,7 @@ void vma_set_page_prot(struct vm_area_struct *vma)
static void __remove_shared_vm_struct(struct vm_area_struct *vma,
struct file *file, struct address_space *mapping)
{
- if (vma->vm_flags & VM_SHARED)
+ if (vma_is_shared_maywrite(vma))
mapping_unmap_writable(mapping);

flush_dcache_mmap_lock(mapping);
@@ -427,7 +427,7 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm,
static void __vma_link_file(struct vm_area_struct *vma,
struct address_space *mapping)
{
- if (vma->vm_flags & VM_SHARED)
+ if (vma_is_shared_maywrite(vma))
mapping_allow_writable(mapping);

flush_dcache_mmap_lock(mapping);
@@ -2596,7 +2596,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vma->vm_pgoff = pgoff;

if (file) {
- if (vm_flags & VM_SHARED) {
+ if (is_shared_maywrite(vm_flags)) {
error = mapping_map_writable(file->f_mapping);
if (error)
goto free_vma;
@@ -2671,7 +2671,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vma_iter_store(&vmi, vma);
mm->map_count++;
if (vma->vm_file) {
- if (vma->vm_flags & VM_SHARED)
+ if (vma_is_shared_maywrite(vma))
mapping_allow_writable(vma->vm_file->f_mapping);

flush_dcache_mmap_lock(vma->vm_file->f_mapping);
@@ -2688,7 +2688,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,

/* Once vma denies write, undo our temporary denial count */
unmap_writable:
- if (file && vm_flags & VM_SHARED)
+ if (file && is_shared_maywrite(vm_flags))
mapping_unmap_writable(file->f_mapping);
file = vma->vm_file;
expanded:
@@ -2734,7 +2734,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
unmap_region(mm, &mm->mm_mt, vma, prev, next, vma->vm_start,
vma->vm_end, true);
}
- if (file && (vm_flags & VM_SHARED))
+ if (file && is_shared_maywrite(vm_flags))
mapping_unmap_writable(file->f_mapping);
free_vma:
vm_area_free(vma);
--
2.40.0