[PATCH 15/16] tmpfs: permit changing size of memlocked file

From: Hugh Dickins
Date: Fri Jul 30 2021 - 04:10:05 EST


We have users who change the size of their memlocked file by F_MEM_UNLOCK,
ftruncate, F_MEM_LOCK. That risks swapout in between, and is distasteful:
particularly if the file is very large (when shmem_unlock_mapping() has a
lot of work to move pages off the Unevictable list, only for them to be
moved back there later on).

Modify shmem_setattr() to grow or shrink, and shmem_fallocate() to grow,
the locked extent. But forbid (EPERM) both if current_ucounts() differs
from the locker's mlock_ucounts (without even a CAP_IPC_LOCK override).
They could be permitted (the caller already has unsealed write access),
but it's probably less confusing to restrict size change to the locker.

But leave shmem_write_begin() as is, preventing the memlocked file from
being extended implicitly by writes beyond EOF: I think that it's best to
demand an explicit size change, by truncate or fallocate, when memlocked.

(But notice in testing "echo x >memlockedfile" how the O_TRUNC succeeds
but the write fails: would F_MEM_UNLOCK on truncation to 0 be better?)

Signed-off-by: Hugh Dickins <hughd@xxxxxxxxxx>
---
mm/shmem.c | 48 ++++++++++++++++++++++++++++++++++++++----------
1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 1ddb910e976c..fa4a264453bf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1123,15 +1123,30 @@ static int shmem_setattr(struct user_namespace *mnt_userns,

/* protected by i_mutex */
if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
- (newsize > oldsize && (info->seals & F_SEAL_GROW)) ||
- (newsize != oldsize && info->mlock_ucounts))
+ (newsize > oldsize && (info->seals & F_SEAL_GROW)))
return -EPERM;

if (newsize != oldsize) {
- error = shmem_reacct_size(SHMEM_I(inode)->flags,
- oldsize, newsize);
+ struct ucounts *ucounts = info->mlock_ucounts;
+
+ if (ucounts && ucounts != current_ucounts())
+ return -EPERM;
+ error = shmem_reacct_size(info->flags,
+ oldsize, newsize);
if (error)
return error;
+ if (ucounts) {
+ loff_t mlock = round_up(newsize, PAGE_SIZE) -
+ round_up(oldsize, PAGE_SIZE);
+ if (mlock < 0) {
+ user_shm_unlock(-mlock, ucounts, false);
+ } else if (mlock > 0 &&
+ !user_shm_lock(mlock, ucounts, false)) {
+ shmem_reacct_size(info->flags,
+ newsize, oldsize);
+ return -EPERM;
+ }
+ }
i_size_write(inode, newsize);
inode->i_ctime = inode->i_mtime = current_time(inode);
}
@@ -2784,6 +2799,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_falloc shmem_falloc;
pgoff_t start, index, end, undo_fallocend;
+ loff_t mlock = 0;
int error;

if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2830,13 +2846,23 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
if (error)
goto out;

- if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
- error = -EPERM;
- goto out;
- }
- if (info->mlock_ucounts && offset + len > inode->i_size) {
+ if (offset + len > inode->i_size) {
error = -EPERM;
- goto out;
+ if (info->seals & F_SEAL_GROW)
+ goto out;
+ if (info->mlock_ucounts) {
+ if (info->mlock_ucounts != current_ucounts() ||
+ (mode & FALLOC_FL_KEEP_SIZE))
+ goto out;
+ mlock = round_up(offset + len, PAGE_SIZE) -
+ round_up(inode->i_size, PAGE_SIZE);
+ if (mlock > 0 &&
+ !user_shm_lock(mlock, info->mlock_ucounts, false)) {
+ mlock = 0;
+ goto out;
+ }
+ }
+ error = 0;
}

start = offset >> PAGE_SHIFT;
@@ -2932,6 +2958,8 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
inode->i_private = NULL;
spin_unlock(&inode->i_lock);
out:
+ if (error && mlock > 0)
+ user_shm_unlock(mlock, info->mlock_ucounts, false);
inode_unlock(inode);
return error;
}
--
2.26.2