[PATCH 1/2] mm/hugetlb: Fix F_SEAL_FUTURE_WRITE

From: Peter Xu
Date: Sat May 01 2021 - 10:42:37 EST


F_SEAL_FUTURE_WRITE is missing for hugetlb starting from the first day.
There is a test program for that and it fails constantly.

$ ./memfd_test hugetlbfs
memfd-hugetlb: CREATE
memfd-hugetlb: BASIC
memfd-hugetlb: SEAL-WRITE
memfd-hugetlb: SEAL-FUTURE-WRITE
mmap() didn't fail as expected
Aborted (core dumped)

I think it's probably because no one is really running the hugetlbfs test.

Fix it by checking FUTURE_WRITE also in hugetlbfs_file_mmap() as what we do in
shmem_mmap(). Generalize a helper for that.

Reported-by: Hugh Dickins <hughd@xxxxxxxxxx>
Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
fs/hugetlbfs/inode.c | 5 +++++
include/linux/mm.h | 32 ++++++++++++++++++++++++++++++++
mm/shmem.c | 22 ++++------------------
3 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a2a42335e8fd2..39922c0f2fc8c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -131,10 +131,15 @@ static void huge_pagevec_release(struct pagevec *pvec)
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
+ struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);

+ ret = seal_check_future_write(info->seals, vma);
+ if (ret)
+ return ret;
+
/*
* vma address alignment (but not the pgoff alignment) has
* already been checked by prepare_hugepage_range. If you add
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 84fb1697b20ff..c3fd7d504a60e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3200,5 +3200,37 @@ extern int sysctl_nr_trim_pages;

void mem_dump_obj(void *object);

+/**
+ * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it
+ * @seals: the seals to check
+ * @vma: the vma to operate on
+ *
+ * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on
+ * the vma flags. Return 0 if check pass, or <0 for errors.
+ */
+static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
+{
+ if (seals & F_SEAL_FUTURE_WRITE) {
+ /*
+ * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+ * "future write" seal active.
+ */
+ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+ return -EPERM;
+
+ /*
+ * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
+ * MAP_SHARED and read-only, take care to not allow mprotect to
+ * revert protections on such mappings. Do this only for shared
+ * mappings. For private mappings, don't need to mask
+ * VM_MAYWRITE as we still want them to be COW-writable.
+ */
+ if (vma->vm_flags & VM_SHARED)
+ vma->vm_flags &= ~(VM_MAYWRITE);
+ }
+
+ return 0;
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/mm/shmem.c b/mm/shmem.c
index 26c76b13ad233..e86a230735b60 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2258,25 +2258,11 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
{
struct shmem_inode_info *info = SHMEM_I(file_inode(file));
+ int ret;

- if (info->seals & F_SEAL_FUTURE_WRITE) {
- /*
- * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
- * "future write" seal active.
- */
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
- return -EPERM;
-
- /*
- * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
- * MAP_SHARED and read-only, take care to not allow mprotect to
- * revert protections on such mappings. Do this only for shared
- * mappings. For private mappings, don't need to mask
- * VM_MAYWRITE as we still want them to be COW-writable.
- */
- if (vma->vm_flags & VM_SHARED)
- vma->vm_flags &= ~(VM_MAYWRITE);
- }
+ ret = seal_check_future_write(info->seals, vma);
+ if (ret)
+ return ret;

/* arm64 - allow memory tagging on RAM-based files */
vma->vm_flags |= VM_MTE_ALLOWED;
--
2.31.1