[PATCH 16/16] memfd: memfd_create(name, MFD_MEM_LOCK) for memlocked shmem

From: Hugh Dickins
Date: Fri Jul 30 2021 - 04:13:07 EST


Now that the size of a memlocked file can be changed, memfd_create() can
accept an MFD_MEM_LOCK flag to request memlocking, even though the initial
size is of course 0.

Signed-off-by: Hugh Dickins <hughd@xxxxxxxxxx>
---
include/uapi/linux/memfd.h | 1 +
mm/memfd.c | 7 +++++--
mm/shmem.c | 13 ++++++++++++-
3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
index 8358a69e78cc..9113b5aa1763 100644
--- a/include/uapi/linux/memfd.h
+++ b/include/uapi/linux/memfd.h
@@ -9,6 +9,7 @@
#define MFD_ALLOW_SEALING 0x0002U
#define MFD_HUGETLB 0x0004U /* Use hugetlbfs */
#define MFD_HUGEPAGE 0x0008U /* Use huge tmpfs */
+#define MFD_MEM_LOCK 0x0010U /* Memlock tmpfs */

/*
* Huge page size encoding when MFD_HUGETLB is specified, and a huge page
diff --git a/mm/memfd.c b/mm/memfd.c
index 0d1a504d2fc9..e39f9eed55d2 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -248,7 +248,8 @@ long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
#define MFD_ALL_FLAGS (MFD_CLOEXEC | \
MFD_ALLOW_SEALING | \
MFD_HUGETLB | \
- MFD_HUGEPAGE)
+ MFD_HUGEPAGE | \
+ MFD_MEM_LOCK)

SYSCALL_DEFINE2(memfd_create,
const char __user *, uname,
@@ -262,7 +263,7 @@ SYSCALL_DEFINE2(memfd_create,

if (flags & MFD_HUGETLB) {
/* Disallow huge tmpfs when choosing hugetlbfs */
- if (flags & MFD_HUGEPAGE)
+ if (flags & (MFD_HUGEPAGE | MFD_MEM_LOCK))
return -EINVAL;
/* Allow huge page size encoding in flags. */
if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
@@ -314,6 +315,8 @@ SYSCALL_DEFINE2(memfd_create,

if (flags & MFD_HUGEPAGE)
vm_flags |= VM_HUGEPAGE;
+ if (flags & MFD_MEM_LOCK)
+ vm_flags |= VM_LOCKED;
file = shmem_file_setup(name, 0, vm_flags);
}

diff --git a/mm/shmem.c b/mm/shmem.c
index fa4a264453bf..a0a83e59ae07 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2395,7 +2395,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
spin_lock_init(&info->lock);
atomic_set(&info->stop_eviction, 0);
info->seals = F_SEAL_SEAL;
- info->flags = flags & VM_NORESERVE;
+ info->flags = flags & (VM_NORESERVE | VM_LOCKED);
if ((flags & VM_HUGEPAGE) &&
transparent_hugepage_allowed(sbinfo) &&
!test_bit(MMF_DISABLE_THP, &current->mm->flags))
@@ -4254,6 +4254,17 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l
inode->i_size = size;
clear_nlink(inode); /* It is unlinked */
res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
+ if (!IS_ERR(res) && (flags & VM_LOCKED)) {
+ struct ucounts *ucounts = current_ucounts();
+ /*
+ * Only memfd_create() may pass VM_LOCKED, and it passes
+ * size 0; but avoid that assumption in case it changes.
+ */
+ if (user_shm_lock(size, ucounts, true))
+ SHMEM_I(inode)->mlock_ucounts = ucounts;
+ else
+ res = ERR_PTR(-EPERM);
+ }
if (!IS_ERR(res))
res = alloc_file_pseudo(inode, mnt, name, O_RDWR,
&shmem_file_operations);
--
2.26.2