[PATCH] mm: extend memfd with ability to create secret memory

From: Mike Rapoport
Date: Mon Nov 18 2019 - 02:32:22 EST


Signed-off-by: Mike Rapoport <rppt@xxxxxxxxxxxxx>
---
include/linux/memfd.h | 9 ++
include/uapi/linux/magic.h | 1 +
include/uapi/linux/memfd.h | 6 +
mm/Kconfig | 3 +
mm/Makefile | 1 +
mm/memfd.c | 10 +-
mm/secretmem.c | 233 +++++++++++++++++++++++++++++++++++++
7 files changed, 261 insertions(+), 2 deletions(-)
create mode 100644 mm/secretmem.c

diff --git a/include/linux/memfd.h b/include/linux/memfd.h
index 4f1600413f91..d3ca7285f51a 100644
--- a/include/linux/memfd.h
+++ b/include/linux/memfd.h
@@ -13,4 +13,13 @@ static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned long a)
}
#endif

+#ifdef CONFIG_MEMFD_SECRETMEM
+extern struct file *secretmem_file_create(const char *name, unsigned int flags);
+#else
+static inline struct file *secretmem_file_create(const char *name, unsigned int flags)
+{
+ return ERR_PTR(-EINVAL);
+}
+#endif
+
#endif /* __LINUX_MEMFD_H */
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 903cc2d2750b..3dad6208c8de 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -94,5 +94,6 @@
#define ZSMALLOC_MAGIC 0x58295829
#define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */
#define Z3FOLD_MAGIC 0x33
+#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */

#endif /* __LINUX_MAGIC_H__ */
diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
index 7a8a26751c23..3320a79b638d 100644
--- a/include/uapi/linux/memfd.h
+++ b/include/uapi/linux/memfd.h
@@ -8,6 +8,12 @@
#define MFD_CLOEXEC 0x0001U
#define MFD_ALLOW_SEALING 0x0002U
#define MFD_HUGETLB 0x0004U
+#define MFD_SECRET 0x0008U
+
+/* ioctls for secret memory */
+#define MFD_SECRET_IOCTL '-'
+#define MFD_SECRET_EXCLUSIVE _IOW(MFD_SECRET_IOCTL, 0x13, unsigned long)
+#define MFD_SECRET_UNCACHED _IOW(MFD_SECRET_IOCTL, 0x14, unsigned long)

/*
* Huge page size encoding when MFD_HUGETLB is specified, and a huge page
diff --git a/mm/Kconfig b/mm/Kconfig
index a5dae9a7eb51..aa828f240287 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -736,4 +736,7 @@ config ARCH_HAS_PTE_SPECIAL
config ARCH_HAS_HUGEPD
bool

+config MEMFD_SECRETMEM
+ def_bool MEMFD_CREATE && ARCH_HAS_SET_DIRECT_MAP
+
endmenu
diff --git a/mm/Makefile b/mm/Makefile
index d996846697ef..54cb8a60d698 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -107,3 +107,4 @@ obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
obj-$(CONFIG_ZONE_DEVICE) += memremap.o
obj-$(CONFIG_HMM_MIRROR) += hmm.o
obj-$(CONFIG_MEMFD_CREATE) += memfd.o
+obj-$(CONFIG_MEMFD_SECRETMEM) += secretmem.o
diff --git a/mm/memfd.c b/mm/memfd.c
index 2647c898990c..3e1cc37e0389 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -245,7 +245,8 @@ long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)

-#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB)
+#define MFD_SECRET_MASK (MFD_CLOEXEC | MFD_SECRET)
+#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | MFD_SECRET)

SYSCALL_DEFINE2(memfd_create,
const char __user *, uname,
@@ -257,6 +258,9 @@ SYSCALL_DEFINE2(memfd_create,
char *name;
long len;

+ if (flags & ~(unsigned int)MFD_SECRET_MASK)
+ return -EINVAL;
+
if (!(flags & MFD_HUGETLB)) {
if (flags & ~(unsigned int)MFD_ALL_FLAGS)
return -EINVAL;
@@ -296,7 +300,9 @@ SYSCALL_DEFINE2(memfd_create,
goto err_name;
}

- if (flags & MFD_HUGETLB) {
+ if (flags & MFD_SECRET) {
+ file = secretmem_file_create(name, flags);
+ } else if (flags & MFD_HUGETLB) {
struct user_struct *user = NULL;

file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user,
diff --git a/mm/secretmem.c b/mm/secretmem.c
new file mode 100644
index 000000000000..e787b8dc925b
--- /dev/null
+++ b/mm/secretmem.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/printk.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/memfd.h>
+#include <linux/pseudo_fs.h>
+#include <linux/set_memory.h>
+#include <uapi/linux/memfd.h>
+#include <uapi/linux/magic.h>
+
+#include <asm/tlb.h>
+
+#define SECRETMEM_EXCLUSIVE 0x1
+#define SECRETMEM_UNCACHED 0x2
+
+struct secretmem_state {
+ unsigned int mode;
+};
+
+static vm_fault_t secretmem_fault(struct vm_fault *vmf)
+{
+ struct secretmem_state *state = vmf->vma->vm_file->private_data;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+ pgoff_t offset = vmf->pgoff;
+ unsigned long addr;
+ struct page *page;
+ int err;
+
+ page = find_get_page(mapping, offset);
+ if (!page) {
+ page = pagecache_get_page(mapping, offset,
+ FGP_CREAT|FGP_FOR_MMAP,
+ vmf->gfp_mask);
+ if (!page)
+ return vmf_error(-ENOMEM);
+
+ __SetPageUptodate(page);
+ }
+
+ if (state->mode == SECRETMEM_EXCLUSIVE)
+ err = set_direct_map_invalid_noflush(page);
+ else if (state->mode == SECRETMEM_UNCACHED)
+ err = set_pages_array_uc(&page, 1);
+ else
+ BUG();
+
+ if (err) {
+ delete_from_page_cache(page);
+ return vmf_error(err);
+ }
+
+ addr = (unsigned long)page_address(page);
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ vmf->page = page;
+ return 0;
+}
+
+static void secretmem_close(struct vm_area_struct *vma)
+{
+ struct secretmem_state *state = vma->vm_file->private_data;
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ struct page *page;
+ pgoff_t index;
+
+ xa_for_each(&mapping->i_pages, index, page) {
+ get_page(page);
+ lock_page(page);
+
+ if (state->mode == SECRETMEM_EXCLUSIVE)
+ set_direct_map_default_noflush(page);
+ else if (state->mode == SECRETMEM_UNCACHED)
+ set_pages_array_wb(&page, 1);
+ else
+ BUG();
+
+ __ClearPageDirty(page);
+ delete_from_page_cache(page);
+
+ unlock_page(page);
+ put_page(page);
+ }
+}
+
+static const struct vm_operations_struct secretmem_vm_ops = {
+ .fault = secretmem_fault,
+ .close = secretmem_close,
+};
+
+static int secretmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct secretmem_state *state = file->private_data;
+ unsigned long mode = state->mode;
+
+ if (!mode)
+ return -EINVAL;
+
+ switch (mode) {
+ case SECRETMEM_UNCACHED:
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ /* fallthrough */
+ case SECRETMEM_EXCLUSIVE:
+ vma->vm_ops = &secretmem_vm_ops;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static long secretmem_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+ struct secretmem_state *state = file->private_data;
+ unsigned long mode = state->mode;
+
+ if (mode)
+ return -EINVAL;
+
+ switch (cmd) {
+ case MFD_SECRET_EXCLUSIVE:
+ mode = SECRETMEM_EXCLUSIVE;
+ break;
+ case MFD_SECRET_UNCACHED:
+ mode = SECRETMEM_UNCACHED;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ state->mode = mode;
+
+ return 0;
+}
+
+static int secretmem_release(struct inode *inode, struct file *file)
+{
+ struct secretmem_state *state = file->private_data;
+
+ kfree(state);
+
+ return 0;
+}
+
+const struct file_operations secretmem_fops = {
+ .release = secretmem_release,
+ .mmap = secretmem_mmap,
+ .unlocked_ioctl = secretmem_ioctl,
+ .compat_ioctl = secretmem_ioctl,
+};
+
+static bool secretmem_isolate_page(struct page *page, isolate_mode_t mode)
+{
+ return false;
+}
+
+static int secretmem_migratepage(struct address_space *mapping,
+ struct page *newpage, struct page *page,
+ enum migrate_mode mode)
+{
+ return -EBUSY;
+}
+
+static void secretmem_putback_page(struct page *page)
+{
+}
+
+static const struct address_space_operations secretmem_aops = {
+ .migratepage = secretmem_migratepage,
+ .isolate_page = secretmem_isolate_page,
+ .putback_page = secretmem_putback_page,
+};
+
+static struct vfsmount *secretmem_mnt;
+
+struct file *secretmem_file_create(const char *name, unsigned int flags)
+{
+ struct inode *inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
+ struct file *file = ERR_PTR(-ENOMEM);
+ struct secretmem_state *state;
+
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ state = kzalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+ goto err_free_inode;
+
+ file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
+ O_RDWR, &secretmem_fops);
+ if (IS_ERR(file))
+ goto err_free_state;
+
+ mapping_set_unevictable(inode->i_mapping);
+
+ inode->i_mapping->private_data = state;
+ inode->i_mapping->a_ops = &secretmem_aops;
+
+ file->private_data = state;
+
+ return file;
+
+err_free_state:
+ kfree(state);
+err_free_inode:
+ iput(inode);
+ return file;
+}
+
+static int secretmem_init_fs_context(struct fs_context *fc)
+{
+ return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type secretmem_fs = {
+ .name = "secretmem",
+ .init_fs_context = secretmem_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+static int secretmem_init(void)
+{
+ int ret = 0;
+
+ secretmem_mnt = kern_mount(&secretmem_fs);
+ if (IS_ERR(secretmem_mnt))
+ ret = PTR_ERR(secretmem_mnt);
+
+ return ret;
+}
+fs_initcall(secretmem_init);
--
2.24.0

--
Sincerely yours,
Mike.