On Wed, Jun 29, 2022 at 04:53:57PM -0600, Khalid Aziz wrote:
mmap is used to establish address range for mshare region and map the
region into process's address space. Add basic mmap operation that
supports setting address range. Also fix code to not allocate new
mm_struct for files in msharefs that exist for information and not
for defining a new mshare region.
Signed-off-by: Khalid Aziz <khalid.aziz@xxxxxxxxxx>
Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
---
mm/mshare.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 41 insertions(+), 7 deletions(-)
diff --git a/mm/mshare.c b/mm/mshare.c
index d238b68b0576..088a6cab1e93 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -9,7 +9,8 @@
*
*
* Copyright (C) 2022 Oracle Corp. All rights reserved.
- * Author: Khalid Aziz <khalid.aziz@xxxxxxxxxx>
+ * Authors: Khalid Aziz <khalid.aziz@xxxxxxxxxx>
+ * Matthew Wilcox <willy@xxxxxxxxxxxxx>
*
*/
@@ -60,9 +61,36 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
return ret;
}
+static int
+msharefs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct mshare_data *info = file->private_data;
+ struct mm_struct *mm = info->mm;
+
+ /*
+ * If this mshare region has been set up once already, bail out
+ */
+ if (mm->mmap_base != 0)
+ return -EINVAL;
+
+ if ((vma->vm_start | vma->vm_end) & (PGDIR_SIZE - 1))
+ return -EINVAL;
+
+ mm->mmap_base = vma->vm_start;
+ mm->task_size = vma->vm_end - vma->vm_start;
+ if (!mm->task_size)
+ mm->task_size--;
+ info->minfo->start = mm->mmap_base;
+ info->minfo->size = mm->task_size;
So, uh, if the second mmap() caller decides to ignore the mshare_info,
should they get an -EINVAL here since the memory mappings won't be at
the same process virtual address?
+ vma->vm_flags |= VM_SHARED_PT;
+ vma->vm_private_data = info;
+ return 0;
+}
+
static const struct file_operations msharefs_file_operations = {
.open = msharefs_open,
.read_iter = msharefs_read,
+ .mmap = msharefs_mmap,
.llseek = no_llseek,
};
@@ -119,7 +147,12 @@ msharefs_fill_mm(struct inode *inode)
goto err_free;
}
info->mm = mm;
- info->minfo = NULL;
+ info->minfo = kzalloc(sizeof(struct mshare_info), GFP_KERNEL);
+ if (info->minfo == NULL) {
+ retval = -ENOMEM;
+ goto err_free;
+ }
+
refcount_set(&info->refcnt, 1);
inode->i_private = info;
@@ -128,13 +161,14 @@ msharefs_fill_mm(struct inode *inode)
err_free:
if (mm)
mmput(mm);
+ kfree(info->minfo);
kfree(info);
return retval;
}
static struct inode
*msharefs_get_inode(struct super_block *sb, const struct inode *dir,
- umode_t mode)
+ umode_t mode, bool newmm)
{
struct inode *inode = new_inode(sb);
if (inode) {
@@ -147,7 +181,7 @@ static struct inode
case S_IFREG:
inode->i_op = &msharefs_file_inode_ops;
inode->i_fop = &msharefs_file_operations;
- if (msharefs_fill_mm(inode) != 0) {
+ if (newmm && msharefs_fill_mm(inode) != 0) {
discard_new_inode(inode);
inode = ERR_PTR(-ENOMEM);
}
@@ -177,7 +211,7 @@ msharefs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
struct inode *inode;
int err = 0;
- inode = msharefs_get_inode(dir->i_sb, dir, mode);
+ inode = msharefs_get_inode(dir->i_sb, dir, mode, true);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -267,7 +301,7 @@ prepopulate_files(struct super_block *s, struct inode *dir,
if (!dentry)
return -ENOMEM;
- inode = msharefs_get_inode(s, dir, S_IFREG | files->mode);
+ inode = msharefs_get_inode(s, dir, S_IFREG | files->mode, false);
I was wondering why the information files were getting their own
mshare_data.
TBH I'm not really sure what the difference is between mshare_data and
mshare_info, since those names are not especially distinct.
if (!inode) {
dput(dentry);
return -ENOMEM;
@@ -301,7 +335,7 @@ msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_d_op = &msharefs_d_ops;
sb->s_time_gran = 1;
- inode = msharefs_get_inode(sb, NULL, S_IFDIR | 0777);
+ inode = msharefs_get_inode(sb, NULL, S_IFDIR | 0777, false);
Is it wise to default to world-writable? Surely whatever userspace
software wraps an msharefs can relax permissions as needed.