Re: [PATCH v2 4/9] mm/mshare: Add a read operation for msharefs files

From: Khalid Aziz
Date: Thu Jun 30 2022 - 18:28:51 EST


On 6/30/22 15:27, Darrick J. Wong wrote:
On Wed, Jun 29, 2022 at 04:53:55PM -0600, Khalid Aziz wrote:
When a new file is created under msharefs, allocate a new mm_struct
that will hold the VMAs for mshare region. Also allocate structure
to defines the mshare region and add a read operation to the file
that returns this information about the mshare region. Currently
this information is returned as a struct:

struct mshare_info {
unsigned long start;
unsigned long size;
};

This gives the start address for mshare region and its size.

Signed-off-by: Khalid Aziz <khalid.aziz@xxxxxxxxxx>
---
include/uapi/linux/mman.h | 5 +++
mm/mshare.c | 64 ++++++++++++++++++++++++++++++++++++++-
2 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index f55bc680b5b0..56fe446e24b1 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -41,4 +41,9 @@
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
+struct mshare_info {
+ unsigned long start;
+ unsigned long size;

You might want to make these explicitly u64, since this is userspace
ABI and you never know when someone will want to do something crazy like
run 32-bit programs with mshare files.

Also you might want to add some padding fields for flags, future
expansion, etc.

That sounds like a good idea. I will queue it up for next version of patch series.


+};
+
#endif /* _UAPI_LINUX_MMAN_H */
diff --git a/mm/mshare.c b/mm/mshare.c
index 2d5924d39221..d238b68b0576 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -22,8 +22,14 @@
#include <uapi/linux/magic.h>
#include <uapi/linux/limits.h>
#include <uapi/linux/mman.h>
+#include <linux/sched/mm.h>
static struct super_block *msharefs_sb;
+struct mshare_data {
+ struct mm_struct *mm;
+ refcount_t refcnt;
+ struct mshare_info *minfo;
+};
static const struct inode_operations msharefs_dir_inode_ops;
static const struct inode_operations msharefs_file_inode_ops;
@@ -34,8 +40,29 @@ msharefs_open(struct inode *inode, struct file *file)
return simple_open(inode, file);
}
+static ssize_t
+msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
+{
+ struct mshare_data *info = iocb->ki_filp->private_data;
+ size_t ret;
+ struct mshare_info m_info;
+
+ if (info->minfo != NULL) {
+ m_info.start = info->minfo->start;
+ m_info.size = info->minfo->size;
+ } else {
+ m_info.start = 0;
+ m_info.size = 0;

Hmmm, read()ing out the shared mapping information. Heh.

When does this case happen? Is it before anybody mmaps this file into
an address space?


It can happen before or after the first mmap which will establish the start address and size. Hence I have to account for both cases.

+ }
+ ret = copy_to_iter(&m_info, sizeof(m_info), iov);
+ if (!ret)
+ return -EFAULT;
+ return ret;
+}
+
static const struct file_operations msharefs_file_operations = {
.open = msharefs_open,
+ .read_iter = msharefs_read,
.llseek = no_llseek,
};
@@ -73,12 +100,43 @@ static struct dentry
return ERR_PTR(-ENOMEM);
}
+static int
+msharefs_fill_mm(struct inode *inode)
+{
+ struct mm_struct *mm;
+ struct mshare_data *info = NULL;
+ int retval = 0;
+
+ mm = mm_alloc();
+ if (!mm) {
+ retval = -ENOMEM;
+ goto err_free;
+ }
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ retval = -ENOMEM;
+ goto err_free;
+ }
+ info->mm = mm;
+ info->minfo = NULL;
+ refcount_set(&info->refcnt, 1);
+ inode->i_private = info;
+
+ return 0;
+
+err_free:
+ if (mm)
+ mmput(mm);
+ kfree(info);
+ return retval;
+}
+
static struct inode
*msharefs_get_inode(struct super_block *sb, const struct inode *dir,
umode_t mode)
{
struct inode *inode = new_inode(sb);
-
if (inode) {
inode->i_ino = get_next_ino();
inode_init_owner(&init_user_ns, inode, dir, mode);
@@ -89,6 +147,10 @@ static struct inode
case S_IFREG:
inode->i_op = &msharefs_file_inode_ops;
inode->i_fop = &msharefs_file_operations;
+ if (msharefs_fill_mm(inode) != 0) {
+ discard_new_inode(inode);
+ inode = ERR_PTR(-ENOMEM);

Is it intentional to clobber the msharefs_fill_mm return value and
replace it with ENOMEM?

ENOMEM sounded like the right value to return from msharefs_get_inode() in case of failure. On the other hand, there isn't much of a reason to not just return the return value from msharefs_fill_mm(). I can change that.

Thanks for the review.

--
Khalid