[PATCH v2 1/9] mm: Add msharefs filesystem

From: Khalid Aziz
Date: Wed Jun 29 2022 - 18:56:57 EST


Add a ram-based filesystem that contains page table sharing
information and files that enables processes to share page tables.
This patch adds the basic filesystem that can be mounted.

Signed-off-by: Khalid Aziz <khalid.aziz@xxxxxxxxxx>
---
Documentation/filesystems/msharefs.rst | 19 +++++
include/uapi/linux/magic.h | 1 +
mm/Makefile | 2 +-
mm/mshare.c | 103 +++++++++++++++++++++++++
4 files changed, 124 insertions(+), 1 deletion(-)
create mode 100644 Documentation/filesystems/msharefs.rst
create mode 100644 mm/mshare.c

diff --git a/Documentation/filesystems/msharefs.rst b/Documentation/filesystems/msharefs.rst
new file mode 100644
index 000000000000..fd161f67045d
--- /dev/null
+++ b/Documentation/filesystems/msharefs.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================================
+msharefs - a filesystem to support shared page tables
+=====================================================
+
+msharefs is a ram-based filesystem that allows multiple processes to
+share page table entries for shared pages.
+
+msharefs is typically mounted like this::
+
+ mount -t msharefs none /sys/fs/mshare
+
+When a process calls mshare syscall with a name for the shared address
+range, a file with the same name is created under msharefs with that
+name. This file can be opened by another process, if permissions
+allow, to query the addresses shared under this range. These files are
+removed by mshare_unlink syscall and can not be deleted directly.
+Hence these files are created as immutable files.
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index f724129c0425..2a57a6ec6f3e 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -105,5 +105,6 @@
#define Z3FOLD_MAGIC 0x33
#define PPC_CMM_MAGIC 0xc7571590
#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */
+#define MSHARE_MAGIC 0x4d534852 /* "MSHR" */

#endif /* __LINUX_MAGIC_H__ */
diff --git a/mm/Makefile b/mm/Makefile
index 6f9ffa968a1a..51a2ab9080d9 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -37,7 +37,7 @@ CFLAGS_init-mm.o += $(call cc-disable-warning, override-init)
CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides)

mmu-y := nommu.o
-mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \
+mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o mshare.o \
mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
msync.o page_vma_mapped.o pagewalk.o \
pgtable-generic.o rmap.o vmalloc.o
diff --git a/mm/mshare.c b/mm/mshare.c
new file mode 100644
index 000000000000..c8fab3869bab
--- /dev/null
+++ b/mm/mshare.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Enable copperating processes to share page table between
+ * them to reduce the extra memory consumed by multiple copies
+ * of page tables.
+ *
+ * This code adds an in-memory filesystem - msharefs.
+ * msharefs is used to manage page table sharing
+ *
+ *
+ * Copyright (C) 2022 Oracle Corp. All rights reserved.
+ * Author: Khalid Aziz <khalid.aziz@xxxxxxxxxx>
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/pseudo_fs.h>
+#include <linux/fileattr.h>
+#include <uapi/linux/magic.h>
+#include <uapi/linux/limits.h>
+
+static struct super_block *msharefs_sb;
+
+static const struct file_operations msharefs_file_operations = {
+ .open = simple_open,
+ .llseek = no_llseek,
+};
+
+static int
+msharefs_d_hash(const struct dentry *dentry, struct qstr *qstr)
+{
+ unsigned long hash = init_name_hash(dentry);
+ const unsigned char *s = qstr->name;
+ unsigned int len = qstr->len;
+
+ while (len--)
+ hash = partial_name_hash(*s++, hash);
+ qstr->hash = end_name_hash(hash);
+ return 0;
+}
+
+static const struct dentry_operations msharefs_d_ops = {
+ .d_hash = msharefs_d_hash,
+};
+
+static int
+msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+ static const struct tree_descr empty_descr = {""};
+ int err;
+
+ sb->s_d_op = &msharefs_d_ops;
+ err = simple_fill_super(sb, MSHARE_MAGIC, &empty_descr);
+ if (err)
+ return err;
+
+ msharefs_sb = sb;
+ return 0;
+}
+
+static int
+msharefs_get_tree(struct fs_context *fc)
+{
+ return get_tree_single(fc, msharefs_fill_super);
+}
+
+static const struct fs_context_operations msharefs_context_ops = {
+ .get_tree = msharefs_get_tree,
+};
+
+static int
+mshare_init_fs_context(struct fs_context *fc)
+{
+ fc->ops = &msharefs_context_ops;
+ return 0;
+}
+
+static struct file_system_type mshare_fs = {
+ .name = "msharefs",
+ .init_fs_context = mshare_init_fs_context,
+ .kill_sb = kill_litter_super,
+};
+
+static int
+mshare_init(void)
+{
+ int ret = 0;
+
+ ret = sysfs_create_mount_point(fs_kobj, "mshare");
+ if (ret)
+ return ret;
+
+ ret = register_filesystem(&mshare_fs);
+ if (ret)
+ sysfs_remove_mount_point(fs_kobj, "mshare");
+
+ return ret;
+}
+
+fs_initcall(mshare_init);
--
2.32.0