[PATCH 3/6] intel_sgx: driver for Intel Secure Guard eXtensions

From: Jarkko Sakkinen
Date: Mon Apr 25 2016 - 13:38:34 EST


Intel(R) SGX is a set of CPU instructions that can be used by
applications to set aside private regions of code and data. The code
outside the enclave is disallowed to access the memory inside the
enclave by the CPU access control.

Intel SGX driver provides a ioctl interface for loading and initializing
enclaves and a pager in order to support oversubscription.

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/sgx.h | 4 +-
drivers/staging/Kconfig | 2 +
drivers/staging/Makefile | 1 +
drivers/staging/intel_sgx/Kconfig | 13 +
drivers/staging/intel_sgx/Makefile | 12 +
drivers/staging/intel_sgx/isgx.h | 238 +++++++
drivers/staging/intel_sgx/isgx_compat_ioctl.c | 179 +++++
drivers/staging/intel_sgx/isgx_ioctl.c | 926 ++++++++++++++++++++++++++
drivers/staging/intel_sgx/isgx_main.c | 369 ++++++++++
drivers/staging/intel_sgx/isgx_page_cache.c | 485 ++++++++++++++
drivers/staging/intel_sgx/isgx_user.h | 113 ++++
drivers/staging/intel_sgx/isgx_util.c | 334 ++++++++++
drivers/staging/intel_sgx/isgx_vma.c | 282 ++++++++
13 files changed, 2956 insertions(+), 2 deletions(-)
create mode 100644 drivers/staging/intel_sgx/Kconfig
create mode 100644 drivers/staging/intel_sgx/Makefile
create mode 100644 drivers/staging/intel_sgx/isgx.h
create mode 100644 drivers/staging/intel_sgx/isgx_compat_ioctl.c
create mode 100644 drivers/staging/intel_sgx/isgx_ioctl.c
create mode 100644 drivers/staging/intel_sgx/isgx_main.c
create mode 100644 drivers/staging/intel_sgx/isgx_page_cache.c
create mode 100644 drivers/staging/intel_sgx/isgx_user.h
create mode 100644 drivers/staging/intel_sgx/isgx_util.c
create mode 100644 drivers/staging/intel_sgx/isgx_vma.c

diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index ef9f20f..5e2692d 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -82,7 +82,7 @@ enum sgx_secinfo_masks {
};

struct sgx_pcmd {
- struct isgx_secinfo secinfo;
+ struct sgx_secinfo secinfo;
u64 enclave_id;
u8 reserved[40];
u8 mac[16];
@@ -185,7 +185,7 @@ static inline int __eadd(struct sgx_page_info *pginfo, void *epc)
return __encls(EADD, pginfo, epc, "d"(0));
}

-static inline int __einit(void *sigstruct, struct isgx_einittoken *einittoken,
+static inline int __einit(void *sigstruct, struct sgx_einittoken *einittoken,
void *secs)
{
return __encls_ret(EINIT, sigstruct, secs, einittoken);
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 5d3b86a..dc64d4b 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -110,4 +110,6 @@ source "drivers/staging/wilc1000/Kconfig"

source "drivers/staging/most/Kconfig"

+source "drivers/staging/intel_sgx/Kconfig"
+
endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 30918ed..992377b 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -47,3 +47,4 @@ obj-$(CONFIG_FB_TFT) += fbtft/
obj-$(CONFIG_FSL_MC_BUS) += fsl-mc/
obj-$(CONFIG_WILC1000) += wilc1000/
obj-$(CONFIG_MOST) += most/
+obj-$(CONFIG_INTEL_SGX) += intel_sgx/
diff --git a/drivers/staging/intel_sgx/Kconfig b/drivers/staging/intel_sgx/Kconfig
new file mode 100644
index 0000000..74e3880
--- /dev/null
+++ b/drivers/staging/intel_sgx/Kconfig
@@ -0,0 +1,13 @@
+config INTEL_SGX
+ tristate "Intel(R) SGX Driver"
+ depends on X86
+ ---help---
+ Intel(R) SGX is a set of CPU instructions that can be used by
+ applications to set aside private regions of code and data. The code
+ outside the enclave is disallowed to access the memory inside the
+ enclave by the CPU access control.
+
+ The firmware uses PRMRR registers to reserve an area of physical memory
+ called Enclave Page Cache (EPC). There is a hardware unit in the
+ processor called Memory Encryption Engine. The MEE encrypts and decrypts
+ the EPC pages as they enter and leave the processor package.
diff --git a/drivers/staging/intel_sgx/Makefile b/drivers/staging/intel_sgx/Makefile
new file mode 100644
index 0000000..cc38853
--- /dev/null
+++ b/drivers/staging/intel_sgx/Makefile
@@ -0,0 +1,12 @@
+obj-$(CONFIG_INTEL_SGX) += intel_sgx.o
+
+intel_sgx-$(CONFIG_INTEL_SGX) += \
+ isgx_ioctl.o \
+ isgx_main.o \
+ isgx_page_cache.o \
+ isgx_util.o \
+ isgx_vma.o
+
+ifdef CONFIG_COMPAT
+intel_sgx-$(CONFIG_INTEL_SGX) += isgx_compat_ioctl.o
+endif
diff --git a/drivers/staging/intel_sgx/isgx.h b/drivers/staging/intel_sgx/isgx.h
new file mode 100644
index 0000000..ec3e649
--- /dev/null
+++ b/drivers/staging/intel_sgx/isgx.h
@@ -0,0 +1,238 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * Authors:
+ *
+ * Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxx>
+ * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#ifndef __ARCH_ISGX_H__
+#define __ARCH_ISGX_H__
+
+#include "isgx_user.h"
+#include <asm/sgx.h>
+#include <linux/kref.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+
+/* Number of times to spin before going to sleep because of an interrupt
+ * storm.
+ */
+#define EINIT_SPIN_COUNT 20
+
+/* Number of tries in total before giving up with EINIT. During each try
+ * EINIT is called the number of times specified by EINIT_SPINT_COUNT.
+ */
+#define EINIT_TRY_COUNT 50
+
+/* Time to sleep between each try. */
+#define EINIT_BACKOFF_TIME 20
+
+#define ISGX_ENCLAVE_PAGE_TCS 0x1
+#define ISGX_ENCLAVE_PAGE_RESERVED 0x2
+
+struct isgx_epc_page {
+ resource_size_t pa;
+ struct list_head free_list;
+};
+
+#define ISGX_VA_SLOT_COUNT 512
+
+struct isgx_va_page {
+ struct isgx_epc_page *epc_page;
+ DECLARE_BITMAP(slots, ISGX_VA_SLOT_COUNT);
+ struct list_head list;
+};
+
+/**
+ * isgx_alloc_va_slot() - allocate VA slot from a VA page
+ *
+ * @page: VA page
+ *
+ * Returns offset to a free VA slot. If there are no free slots, an offset of
+ * PAGE_SIZE is returned.
+ */
+static inline unsigned int isgx_alloc_va_slot(struct isgx_va_page *page)
+{
+ int slot = find_first_zero_bit(page->slots, ISGX_VA_SLOT_COUNT);
+
+ if (slot < ISGX_VA_SLOT_COUNT)
+ set_bit(slot, page->slots);
+
+ return slot << 3;
+}
+
+/**
+ * isgx_free_va_slot() - free VA slot from a VA page
+ *
+ * @page: VA page
+ * @offset: the offset of the VA slot
+ *
+ * Releases VA slot.
+ */
+static inline void isgx_free_va_slot(struct isgx_va_page *page,
+ unsigned int offset)
+{
+ clear_bit(offset >> 3, page->slots);
+}
+
+struct isgx_enclave_page {
+ unsigned long addr;
+ unsigned int flags;
+ struct isgx_epc_page *epc_page;
+ struct list_head load_list;
+ struct isgx_enclave *enclave;
+ struct isgx_va_page *va_page;
+ unsigned int va_offset;
+ struct sgx_pcmd pcmd;
+ struct rb_node node;
+};
+
+#define ISGX_ENCLAVE_INITIALIZED 0x01
+#define ISGX_ENCLAVE_DEBUG 0x02
+#define ISGX_ENCLAVE_SECS_EVICTED 0x04
+#define ISGX_ENCLAVE_SUSPEND 0x08
+
+struct isgx_vma {
+ struct vm_area_struct *vma;
+ struct list_head vma_list;
+};
+
+struct isgx_tgid_ctx {
+ struct pid *tgid;
+ atomic_t epc_cnt;
+ struct kref refcount;
+ struct list_head enclave_list;
+ struct list_head list;
+};
+
+struct isgx_enclave {
+ /* the enclave lock */
+ struct mutex lock;
+ unsigned int flags;
+ struct task_struct *owner;
+ struct mm_struct *mm;
+ struct file *backing;
+ struct list_head vma_list;
+ struct list_head load_list;
+ struct kref refcount;
+ unsigned long base;
+ unsigned long size;
+ struct list_head va_pages;
+ struct rb_root enclave_rb;
+ struct list_head add_page_reqs;
+ struct work_struct add_page_work;
+ unsigned int secs_child_cnt;
+ struct isgx_enclave_page secs_page;
+ struct isgx_tgid_ctx *tgid_ctx;
+ struct list_head enclave_list;
+};
+
+extern struct workqueue_struct *isgx_add_page_wq;
+extern unsigned long isgx_epc_base;
+extern unsigned long isgx_epc_size;
+#ifdef CONFIG_X86_64
+extern void *isgx_epc_mem;
+#endif
+extern u64 isgx_enclave_size_max_32;
+extern u64 isgx_enclave_size_max_64;
+extern u64 isgx_xfrm_mask;
+extern u32 isgx_ssaframesize_tbl[64];
+
+extern struct vm_operations_struct isgx_vm_ops;
+extern atomic_t isgx_nr_pids;
+
+/* Message macros */
+#define isgx_dbg(encl, fmt, ...) \
+ pr_debug_ratelimited("isgx: [%d:0x%p] " fmt, \
+ pid_nr((encl)->tgid_ctx->tgid), \
+ (void *)(encl)->base, ##__VA_ARGS__)
+#define isgx_info(encl, fmt, ...) \
+ pr_info_ratelimited("isgx: [%d:0x%p] " fmt, \
+ pid_nr((encl)->tgid_ctx->tgid), \
+ (void *)(encl)->base, ##__VA_ARGS__)
+#define isgx_warn(encl, fmt, ...) \
+ pr_warn_ratelimited("isgx: [%d:0x%p] " fmt, \
+ pid_nr((encl)->tgid_ctx->tgid), \
+ (void *)(encl)->base, ##__VA_ARGS__)
+#define isgx_err(encl, fmt, ...) \
+ pr_err_ratelimited("isgx: [%d:0x%p] " fmt, \
+ pid_nr((encl)->tgid_ctx->tgid), \
+ (void *)(encl)->base, ##__VA_ARGS__)
+
+/*
+ * Ioctl subsystem.
+ */
+
+long isgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
+#ifdef CONFIG_COMPAT
+long isgx_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
+#endif
+void isgx_add_page_worker(struct work_struct *work);
+
+/*
+ * Utility functions
+ */
+
+void *isgx_get_epc_page(struct isgx_epc_page *entry);
+void isgx_put_epc_page(void *epc_page_vaddr);
+struct page *isgx_get_backing(struct isgx_enclave *enclave,
+ struct isgx_enclave_page *entry);
+void isgx_put_backing(struct page *backing, bool write);
+void isgx_insert_pte(struct isgx_enclave *enclave,
+ struct isgx_enclave_page *enclave_page,
+ struct isgx_epc_page *epc_page,
+ struct vm_area_struct *vma);
+int isgx_eremove(struct isgx_epc_page *epc_page);
+int isgx_test_and_clear_young(struct isgx_enclave_page *page);
+struct isgx_vma *isgx_find_vma(struct isgx_enclave *enclave,
+ unsigned long addr);
+void isgx_zap_tcs_ptes(struct isgx_enclave *enclave,
+ struct vm_area_struct *vma);
+bool isgx_pin_mm(struct isgx_enclave *encl);
+void isgx_unpin_mm(struct isgx_enclave *encl);
+void isgx_invalidate(struct isgx_enclave *encl);
+int isgx_find_enclave(struct mm_struct *mm, unsigned long addr,
+ struct vm_area_struct **vma);
+struct isgx_enclave_page *isgx_enclave_find_page(struct isgx_enclave *enclave,
+ unsigned long enclave_la);
+void isgx_enclave_release(struct kref *ref);
+void release_tgid_ctx(struct kref *ref);
+
+/*
+ * Page cache subsystem.
+ */
+
+#define ISGX_NR_LOW_EPC_PAGES_DEFAULT 32
+#define ISGX_NR_SWAP_CLUSTER_MAX 16
+
+extern struct mutex isgx_tgid_ctx_mutex;
+extern struct list_head isgx_tgid_ctx_list;
+extern struct task_struct *kisgxswapd_tsk;
+
+enum isgx_alloc_flags {
+ ISGX_ALLOC_ATOMIC = BIT(0),
+};
+
+enum isgx_free_flags {
+ ISGX_FREE_SKIP_EREMOVE = BIT(0),
+};
+
+int kisgxswapd(void *p);
+int isgx_page_cache_init(resource_size_t start, unsigned long size);
+void isgx_page_cache_teardown(void);
+struct isgx_epc_page *isgx_alloc_epc_page(
+ struct isgx_tgid_ctx *tgid_epc_cnt, unsigned int flags);
+void isgx_free_epc_page(struct isgx_epc_page *entry,
+ struct isgx_enclave *encl,
+ unsigned int flags);
+
+#endif /* __ARCH_X86_ISGX_H__ */
diff --git a/drivers/staging/intel_sgx/isgx_compat_ioctl.c b/drivers/staging/intel_sgx/isgx_compat_ioctl.c
new file mode 100644
index 0000000..e75b0cf
--- /dev/null
+++ b/drivers/staging/intel_sgx/isgx_compat_ioctl.c
@@ -0,0 +1,179 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * Authors:
+ *
+ * Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxx>
+ * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ * Serge Ayoun <serge.ayoun@xxxxxxxxx>
+ * Shay Katz-zamir <shay.katz-zamir@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include "isgx.h"
+#include <linux/acpi.h>
+#include <linux/compat.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/suspend.h>
+
+#define ISGX32_IOC_ENCLAVE_CREATE \
+ _IOWR('p', 0x02, struct sgx_create_param32)
+#define ISGX32_IOC_ENCLAVE_ADD_PAGE \
+ _IOW('p', 0x03, struct sgx_add_param32)
+#define ISGX32_IOC_ENCLAVE_INIT \
+ _IOW('p', 0x04, struct sgx_init_param32)
+#define ISGX32_IOC_ENCLAVE_DESTROY \
+ _IOW('p', 0x06, struct sgx_destroy_param32)
+
+struct sgx_create_param32 {
+ u32 secs;
+ u32 addr;
+};
+
+static long enclave_create_compat(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+ struct sgx_create_param32 create_param32;
+ struct sgx_create_param *create_param;
+ unsigned long addr;
+ int ret;
+
+ if (copy_from_user(&create_param32, (void *)arg,
+ sizeof(create_param32)))
+ return -EFAULT;
+
+ create_param = compat_alloc_user_space(sizeof(*create_param));
+ if (!create_param ||
+ __put_user((void __user *)(unsigned long)create_param32.secs,
+ &create_param->secs))
+ return -EFAULT;
+
+ ret = isgx_ioctl(filep, SGX_IOC_ENCLAVE_CREATE,
+ (unsigned long)create_param);
+ if (ret)
+ return ret;
+
+ if (__get_user(addr, &create_param->addr))
+ return -EFAULT;
+
+ create_param32.addr = addr;
+
+ if (copy_to_user((void *)arg, &create_param32, sizeof(create_param32)))
+ return -EFAULT;
+
+ return 0;
+}
+
+struct sgx_add_param32 {
+ u32 addr;
+ u32 user_addr;
+ u32 secinfo;
+ u32 flags;
+};
+
+static long enclave_add_page_compat(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+ struct sgx_add_param32 add_param32;
+ struct sgx_add_param *add_param;
+
+ if (copy_from_user(&add_param32, (void *)arg,
+ sizeof(add_param32)))
+ return -EFAULT;
+
+ add_param = compat_alloc_user_space(sizeof(*add_param));
+ if (!add_param)
+ return -EFAULT;
+
+ if (__put_user((unsigned long)add_param32.addr,
+ &add_param->addr) ||
+ __put_user((unsigned long)add_param32.user_addr,
+ &add_param->user_addr) ||
+ __put_user((unsigned long)add_param32.secinfo,
+ &add_param->secinfo) ||
+ __put_user((unsigned long)add_param32.flags,
+ &add_param->flags))
+ return -EFAULT;
+
+ return isgx_ioctl(filep, SGX_IOC_ENCLAVE_ADD_PAGE,
+ (unsigned long)add_param);
+}
+
+struct sgx_init_param32 {
+ u32 addr;
+ u32 sigstruct;
+ u32 einittoken;
+};
+
+static long enclave_init_compat(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+ struct sgx_init_param32 init_param32;
+ struct sgx_init_param *init_param;
+
+ if (copy_from_user(&init_param32, (void *)arg,
+ sizeof(init_param32)))
+ return -EFAULT;
+
+ init_param = compat_alloc_user_space(sizeof(*init_param));
+ if (!init_param)
+ return -EFAULT;
+
+ if (__put_user((void __user *)(unsigned long)init_param32.addr,
+ &init_param->addr) ||
+ __put_user((void __user *)(unsigned long)init_param32.sigstruct,
+ &init_param->sigstruct) ||
+ __put_user((void __user *)(unsigned long)init_param32.einittoken,
+ &init_param->einittoken))
+ return -EFAULT;
+
+ return isgx_ioctl(filep, SGX_IOC_ENCLAVE_INIT,
+ (unsigned long)init_param);
+}
+
+struct sgx_destroy_param32 {
+ u32 addr;
+};
+
+static long enclave_destroy_compat(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+ struct sgx_destroy_param32 destroy_param32;
+ struct sgx_destroy_param *destroy_param;
+
+ if (copy_from_user(&destroy_param32, (void *)arg,
+ sizeof(destroy_param32)))
+ return -EFAULT;
+
+ destroy_param = compat_alloc_user_space(sizeof(*destroy_param));
+ if (!destroy_param)
+ return -EFAULT;
+
+ if (__put_user((void __user *)(unsigned long)destroy_param32.addr,
+ &destroy_param->addr))
+ return -EFAULT;
+
+ return isgx_ioctl(filep, SGX_IOC_ENCLAVE_DESTROY,
+ (unsigned long)destroy_param);
+}
+
+long isgx_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case ISGX32_IOC_ENCLAVE_CREATE:
+ return enclave_create_compat(filep, cmd, arg);
+ case ISGX32_IOC_ENCLAVE_ADD_PAGE:
+ return enclave_add_page_compat(filep, cmd, arg);
+ case ISGX32_IOC_ENCLAVE_INIT:
+ return enclave_init_compat(filep, cmd, arg);
+ case ISGX32_IOC_ENCLAVE_DESTROY:
+ return enclave_destroy_compat(filep, cmd, arg);
+ default:
+ return -EINVAL;
+ }
+}
diff --git a/drivers/staging/intel_sgx/isgx_ioctl.c b/drivers/staging/intel_sgx/isgx_ioctl.c
new file mode 100644
index 0000000..9d8b36b
--- /dev/null
+++ b/drivers/staging/intel_sgx/isgx_ioctl.c
@@ -0,0 +1,926 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * Authors:
+ *
+ * Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxx>
+ * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ * Serge Ayoun <serge.ayoun@xxxxxxxxx>
+ * Shay Katz-zamir <shay.katz-zamir@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include "isgx.h"
+#include <asm/mman.h>
+#include <linux/delay.h>
+#include <linux/file.h>
+#include <linux/highmem.h>
+#include <linux/ratelimit.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/hashtable.h>
+#include <linux/shmem_fs.h>
+
+struct isgx_add_page_req {
+ struct list_head list;
+ struct isgx_enclave_page *enclave_page;
+ struct sgx_secinfo secinfo;
+ u64 flags;
+};
+
+static u16 isgx_isvsvnle_min;
+atomic_t isgx_nr_pids = ATOMIC_INIT(0);
+
+static struct isgx_tgid_ctx *find_tgid_epc_cnt(struct pid *tgid)
+{
+ struct isgx_tgid_ctx *ctx;
+
+ list_for_each_entry(ctx, &isgx_tgid_ctx_list, list)
+ if (pid_nr(ctx->tgid) == pid_nr(tgid))
+ return ctx;
+
+ return NULL;
+}
+
+static int add_tgid_ctx(struct isgx_enclave *enclave)
+{
+ struct isgx_tgid_ctx *ctx;
+ struct pid *tgid = get_pid(task_tgid(current));
+
+ mutex_lock(&isgx_tgid_ctx_mutex);
+
+ ctx = find_tgid_epc_cnt(tgid);
+ if (ctx) {
+ kref_get(&ctx->refcount);
+ enclave->tgid_ctx = ctx;
+ mutex_unlock(&isgx_tgid_ctx_mutex);
+ put_pid(tgid);
+ return 0;
+ }
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx) {
+ mutex_unlock(&isgx_tgid_ctx_mutex);
+ put_pid(tgid);
+ return -ENOMEM;
+ }
+
+ ctx->tgid = tgid;
+ kref_init(&ctx->refcount);
+ INIT_LIST_HEAD(&ctx->enclave_list);
+
+ list_add(&ctx->list, &isgx_tgid_ctx_list);
+ atomic_inc(&isgx_nr_pids);
+
+ enclave->tgid_ctx = ctx;
+
+ mutex_unlock(&isgx_tgid_ctx_mutex);
+ return 0;
+}
+
+void release_tgid_ctx(struct kref *ref)
+{
+ struct isgx_tgid_ctx *pe =
+ container_of(ref, struct isgx_tgid_ctx, refcount);
+ mutex_lock(&isgx_tgid_ctx_mutex);
+ list_del(&pe->list);
+ atomic_dec(&isgx_nr_pids);
+ mutex_unlock(&isgx_tgid_ctx_mutex);
+ put_pid(pe->tgid);
+ kfree(pe);
+}
+
+static int enclave_rb_insert(struct rb_root *root,
+ struct isgx_enclave_page *data)
+{
+ struct rb_node **new = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct isgx_enclave_page *this =
+ container_of(*new, struct isgx_enclave_page, node);
+
+ parent = *new;
+ if (data->addr < this->addr)
+ new = &((*new)->rb_left);
+ else if (data->addr > this->addr)
+ new = &((*new)->rb_right);
+ else
+ return -EFAULT;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+
+ return 0;
+}
+
+/**
+ * construct_enclave_page() - populate a new enclave page instance
+ * @enclave an enclave
+ * @entry the enclave page to be populated
+ * @addr the linear address of the enclave page
+ *
+ * Allocates VA slot for the enclave page and fills out its fields. Returns
+ * an error code on failure that can be either a POSIX error code or one of the
+ * error codes defined in isgx_user.h.
+ */
+static int construct_enclave_page(struct isgx_enclave *enclave,
+ struct isgx_enclave_page *entry,
+ unsigned long addr)
+{
+ struct isgx_va_page *va_page;
+ struct isgx_epc_page *epc_page = NULL;
+ unsigned int va_offset = PAGE_SIZE;
+ void *vaddr;
+ int ret = 0;
+
+ list_for_each_entry(va_page, &enclave->va_pages, list) {
+ va_offset = isgx_alloc_va_slot(va_page);
+ if (va_offset < PAGE_SIZE)
+ break;
+ }
+
+ if (va_offset == PAGE_SIZE) {
+ va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
+ if (!va_page)
+ return -ENOMEM;
+
+ epc_page = isgx_alloc_epc_page(NULL, 0);
+ if (IS_ERR(epc_page)) {
+ kfree(va_page);
+ return PTR_ERR(epc_page);
+ }
+
+ vaddr = isgx_get_epc_page(epc_page);
+ if (!vaddr) {
+ isgx_warn(enclave, "kmap of a new VA page failed %d\n",
+ ret);
+ isgx_free_epc_page(epc_page, NULL,
+ ISGX_FREE_SKIP_EREMOVE);
+ kfree(va_page);
+ return -EFAULT;
+ }
+
+ ret = __epa(vaddr);
+ isgx_put_epc_page(vaddr);
+
+ if (ret) {
+ isgx_warn(enclave, "EPA returned %d\n", ret);
+ isgx_free_epc_page(epc_page, NULL, 0);
+ kfree(va_page);
+ return -EFAULT;
+ }
+
+ va_page->epc_page = epc_page;
+ va_offset = isgx_alloc_va_slot(va_page);
+ list_add(&va_page->list, &enclave->va_pages);
+ }
+
+ entry->enclave = enclave;
+ entry->va_page = va_page;
+ entry->va_offset = va_offset;
+ entry->addr = addr;
+
+ return 0;
+}
+
+static int get_enclave(unsigned long addr, struct isgx_enclave **enclave)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int ret;
+
+ down_read(&mm->mmap_sem);
+
+ ret = isgx_find_enclave(mm, addr, &vma);
+ if (!ret) {
+ *enclave = vma->vm_private_data;
+ kref_get(&(*enclave)->refcount);
+ }
+
+ up_read(&mm->mmap_sem);
+
+ return ret;
+}
+
+static int set_enclave(unsigned long addr, struct isgx_enclave *enclave)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct isgx_vma *evma;
+ int ret;
+
+ down_read(&mm->mmap_sem);
+
+ ret = isgx_find_enclave(mm, addr, &vma);
+ if (ret != -ENOENT)
+ goto out;
+ else
+ ret = 0;
+
+ vma->vm_private_data = enclave;
+
+ evma = kzalloc(sizeof(*evma), GFP_KERNEL);
+ if (!evma) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ evma->vma = vma;
+ list_add_tail(&evma->vma_list, &enclave->vma_list);
+out:
+ up_read(&mm->mmap_sem);
+ return ret;
+}
+
+static int validate_secs(const struct sgx_secs *secs)
+{
+ u32 needed_ssaframesize = 1;
+ u32 tmp;
+ int i;
+
+ if (secs->flags & SGX_SECS_A_RESERVED_MASK)
+ return -EINVAL;
+
+ if (secs->flags & SGX_SECS_A_MODE64BIT) {
+#ifdef CONFIG_X86_64
+ if (secs->size > isgx_enclave_size_max_64)
+ return -EINVAL;
+#else
+ return -EINVAL;
+#endif
+ } else {
+ /* On 64-bit architecture allow 32-bit enclaves only in
+ * the compatibility mode.
+ */
+#ifdef CONFIG_X86_64
+ if (!test_thread_flag(TIF_ADDR32))
+ return -EINVAL;
+#endif
+ if (secs->size > isgx_enclave_size_max_32)
+ return -EINVAL;
+ }
+
+ if ((secs->xfrm & 0x3) != 0x3 || (secs->xfrm & ~isgx_xfrm_mask))
+ return -EINVAL;
+
+ /* SKL quirk */
+ if ((secs->xfrm & BIT(3)) != (secs->xfrm & BIT(4)))
+ return -EINVAL;
+
+ for (i = 2; i < 64; i++) {
+ tmp = isgx_ssaframesize_tbl[i];
+ if (((1 << i) & secs->xfrm) && (tmp > needed_ssaframesize))
+ needed_ssaframesize = tmp;
+ }
+
+ if (!secs->ssaframesize || !needed_ssaframesize ||
+ needed_ssaframesize > secs->ssaframesize)
+ return -EINVAL;
+
+ /* Must be power of two */
+ if (secs->size == 0 || (secs->size & (secs->size - 1)) != 0)
+ return -EINVAL;
+
+ for (i = 0; i < SGX_SECS_RESERVED1_SIZE; i++)
+ if (secs->reserved1[i])
+ return -EINVAL;
+
+ for (i = 0; i < SGX_SECS_RESERVED2_SIZE; i++)
+ if (secs->reserved2[i])
+ return -EINVAL;
+
+ for (i = 0; i < SGX_SECS_RESERVED3_SIZE; i++)
+ if (secs->reserved3[i])
+ return -EINVAL;
+
+ for (i = 0; i < SGX_SECS_RESERVED4_SIZE; i++)
+ if (secs->reserved[i])
+ return -EINVAL;
+
+ return 0;
+}
+
+static long isgx_ioctl_enclave_create(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+ struct sgx_page_info pginfo;
+ struct sgx_secinfo secinfo;
+ struct sgx_create_param *createp = (struct sgx_create_param *)arg;
+ void *secs_la = createp->secs;
+ struct isgx_enclave *enclave = NULL;
+ struct sgx_secs *secs = NULL;
+ struct isgx_epc_page *secs_epc_page;
+ void *secs_vaddr = NULL;
+ struct file *backing;
+ long ret;
+
+ secs = kzalloc(sizeof(*secs), GFP_KERNEL);
+ if (!secs)
+ return -ENOMEM;
+ ret = copy_from_user((void *)secs, secs_la, sizeof(*secs));
+ if (ret) {
+ kfree(secs);
+ return ret;
+ }
+
+ if (validate_secs(secs)) {
+ kfree(secs);
+ return -EINVAL;
+ }
+
+ secs->base = vm_mmap(filep, 0, secs->size,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_SHARED, 0);
+ if (IS_ERR((void *)(unsigned long)secs->base)) {
+ ret = PTR_ERR((void *)(unsigned long)secs->base);
+ kfree(secs);
+ pr_warn("isgx: creating VMA for an enclave failed\n");
+ return ret;
+ }
+
+ backing = shmem_file_setup("dev/isgx", secs->size + PAGE_SIZE,
+ VM_NORESERVE);
+ if (IS_ERR(backing)) {
+ ret = PTR_ERR((void *)backing);
+ vm_munmap(secs->base, secs->size);
+ kfree(secs);
+
+ pr_warn("isgx: creating backing storage for enclave failed\n");
+ return PTR_ERR(backing);
+ }
+
+ enclave = kzalloc(sizeof(*enclave), GFP_KERNEL);
+ if (!enclave)
+ goto out;
+
+ kref_init(&enclave->refcount);
+ INIT_LIST_HEAD(&enclave->add_page_reqs);
+ INIT_LIST_HEAD(&enclave->va_pages);
+ INIT_LIST_HEAD(&enclave->vma_list);
+ INIT_LIST_HEAD(&enclave->load_list);
+ INIT_LIST_HEAD(&enclave->enclave_list);
+ mutex_init(&enclave->lock);
+ INIT_WORK(&enclave->add_page_work, isgx_add_page_worker);
+
+ enclave->owner = current->group_leader;
+ enclave->mm = current->mm;
+ enclave->base = secs->base;
+ enclave->size = secs->size;
+ enclave->backing = backing;
+
+ ret = add_tgid_ctx(enclave);
+ if (ret)
+ goto out;
+
+ secs_epc_page = isgx_alloc_epc_page(NULL, 0);
+ if (IS_ERR(secs_epc_page)) {
+ ret = PTR_ERR(secs_epc_page);
+ secs_epc_page = NULL;
+ goto out;
+ }
+
+ enclave->secs_page.epc_page = secs_epc_page;
+
+ ret = construct_enclave_page(enclave, &enclave->secs_page,
+ enclave->base + enclave->size);
+ if (ret)
+ goto out;
+
+ secs_vaddr = isgx_get_epc_page(enclave->secs_page.epc_page);
+
+ pginfo.srcpge = (unsigned long)secs;
+ pginfo.linaddr = 0;
+ pginfo.secinfo = (unsigned long)&secinfo;
+ pginfo.secs = 0;
+ memset(&secinfo, 0, sizeof(secinfo));
+ ret = __ecreate((void *)&pginfo, secs_vaddr);
+
+ isgx_put_epc_page(secs_vaddr);
+
+ if (ret) {
+ isgx_info(enclave, "ECREATE returned %ld\n", ret);
+ goto out;
+ }
+
+ if (secs->flags & SGX_SECS_A_DEBUG)
+ enclave->flags |= ISGX_ENCLAVE_DEBUG;
+
+ ret = set_enclave(secs->base, enclave);
+
+ mutex_lock(&isgx_tgid_ctx_mutex);
+ list_add_tail(&enclave->enclave_list, &enclave->tgid_ctx->enclave_list);
+ mutex_unlock(&isgx_tgid_ctx_mutex);
+out:
+ if (ret) {
+ vm_munmap(secs->base, secs->size);
+ if (enclave)
+ kref_put(&enclave->refcount, isgx_enclave_release);
+ } else {
+ createp->addr = (unsigned long)enclave->base;
+ }
+ kfree(secs);
+ return ret;
+}
+
+static int validate_secinfo(struct sgx_secinfo *secinfo)
+{
+ u64 perm = secinfo->flags & ISGX_SECINFO_PERMISSION_MASK;
+ u64 page_type = secinfo->flags & ISGX_SECINFO_PAGE_TYPE_MASK;
+ int i;
+
+ if ((secinfo->flags & ISGX_SECINFO_RESERVED_MASK) ||
+ ((perm & SGX_SECINFO_FL_W) && !(perm & SGX_SECINFO_FL_R)) ||
+ (page_type != SGX_SECINFO_PT_TCS &&
+ page_type != SGX_SECINFO_PT_REG))
+ return -EINVAL;
+
+ for (i = 0; i < sizeof(secinfo->reserved) / sizeof(u64); i++)
+ if (secinfo->reserved[i])
+ return -EINVAL;
+
+ return 0;
+}
+
+static int validate_tcs(struct sgx_tcs *tcs)
+{
+ int i;
+
+ /* If FLAGS is not zero, ECALL will fail. */
+ if ((tcs->flags != 0) ||
+ (tcs->ossa & (PAGE_SIZE - 1)) ||
+ (tcs->ofsbase & (PAGE_SIZE - 1)) ||
+ (tcs->ogsbase & (PAGE_SIZE - 1)) ||
+ ((tcs->fslimit & 0xFFF) != 0xFFF) ||
+ ((tcs->gslimit & 0xFFF) != 0xFFF))
+ return -EINVAL;
+
+ for (i = 0; i < sizeof(tcs->reserved) / sizeof(u64); i++)
+ if (tcs->reserved[i])
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __enclave_add_page(struct isgx_enclave *enclave,
+ struct isgx_enclave_page *enclave_page,
+ struct sgx_add_param *addp,
+ struct sgx_secinfo *secinfo)
+{
+ u64 page_type = secinfo->flags & ISGX_SECINFO_PAGE_TYPE_MASK;
+ struct sgx_tcs *tcs;
+ struct page *backing;
+ struct isgx_add_page_req *req = NULL;
+ int ret;
+ int empty;
+ void *user_vaddr;
+ void *tmp_vaddr;
+ struct page *tmp_page;
+
+ tmp_page = alloc_page(GFP_HIGHUSER);
+ if (!tmp_page)
+ return -ENOMEM;
+
+ tmp_vaddr = kmap(tmp_page);
+ ret = copy_from_user((void *)tmp_vaddr, (void *)addp->user_addr,
+ PAGE_SIZE);
+ kunmap(tmp_page);
+ if (ret) {
+ __free_page(tmp_page);
+ return -EFAULT;
+ }
+
+ if (validate_secinfo(secinfo)) {
+ __free_page(tmp_page);
+ return -EINVAL;
+ }
+
+ if (page_type == SGX_SECINFO_PT_TCS) {
+ tcs = (struct sgx_tcs *)kmap(tmp_page);
+ ret = validate_tcs(tcs);
+ kunmap(tmp_page);
+ if (ret) {
+ __free_page(tmp_page);
+ return ret;
+ }
+ }
+
+ ret = construct_enclave_page(enclave, enclave_page, addp->addr);
+ if (ret) {
+ __free_page(tmp_page);
+ return -EINVAL;
+ }
+
+ mutex_lock(&enclave->lock);
+
+ if (enclave->flags & ISGX_ENCLAVE_INITIALIZED) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (isgx_enclave_find_page(enclave, addp->addr)) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ backing = isgx_get_backing(enclave, enclave_page);
+ if (IS_ERR((void *)backing)) {
+ ret = PTR_ERR((void *)backing);
+ goto out;
+ }
+
+ user_vaddr = kmap(backing);
+ tmp_vaddr = kmap(tmp_page);
+ memcpy(user_vaddr, tmp_vaddr, PAGE_SIZE);
+ kunmap(backing);
+ kunmap(tmp_page);
+
+ if (page_type == SGX_SECINFO_PT_TCS)
+ enclave_page->flags |= ISGX_ENCLAVE_PAGE_TCS;
+
+ memcpy(&req->secinfo, secinfo, sizeof(*secinfo));
+
+ req->enclave_page = enclave_page;
+ req->flags = addp->flags;
+ empty = list_empty(&enclave->add_page_reqs);
+ kref_get(&enclave->refcount);
+ list_add_tail(&req->list, &enclave->add_page_reqs);
+ if (empty)
+ queue_work(isgx_add_page_wq, &enclave->add_page_work);
+
+ isgx_put_backing(backing, true /* write */);
+out:
+
+ if (ret) {
+ kfree(req);
+ isgx_free_va_slot(enclave_page->va_page,
+ enclave_page->va_offset);
+ } else {
+ ret = enclave_rb_insert(&enclave->enclave_rb, enclave_page);
+ WARN_ON(ret);
+ }
+
+ mutex_unlock(&enclave->lock);
+ __free_page(tmp_page);
+ return ret;
+}
+
+static long isgx_ioctl_enclave_add_page(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+ struct sgx_add_param *addp;
+ struct isgx_enclave *enclave;
+ struct isgx_enclave_page *page;
+ struct sgx_secinfo secinfo;
+ int ret;
+
+ addp = (struct sgx_add_param *)arg;
+ if (addp->addr & (PAGE_SIZE - 1))
+ return -EINVAL;
+
+ if (copy_from_user(&secinfo, (void __user *)addp->secinfo,
+ sizeof(secinfo)))
+ return -EFAULT;
+
+ ret = get_enclave(addp->addr, &enclave);
+ if (ret)
+ return ret;
+
+ if (addp->addr < enclave->base ||
+ addp->addr > (enclave->base + enclave->size - PAGE_SIZE)) {
+ kref_put(&enclave->refcount, isgx_enclave_release);
+ return -EINVAL;
+ }
+
+ page = kzalloc(sizeof(*page), GFP_KERNEL);
+ if (!page) {
+ kref_put(&enclave->refcount, isgx_enclave_release);
+ return -ENOMEM;
+ }
+
+ ret = __enclave_add_page(enclave, page, addp, &secinfo);
+ kref_put(&enclave->refcount, isgx_enclave_release);
+
+ if (ret)
+ kfree(page);
+
+ return ret;
+}
+
+static int __isgx_enclave_init(struct isgx_enclave *enclave,
+ char *sigstruct,
+ struct sgx_einittoken *einittoken)
+{
+ int ret = SGX_UNMASKED_EVENT;
+ struct isgx_epc_page *secs_epc_page = enclave->secs_page.epc_page;
+ void *secs_va = NULL;
+ int i;
+ int j;
+
+ if (einittoken->valid && einittoken->isvsvnle < isgx_isvsvnle_min)
+ return SGX_LE_ROLLBACK;
+
+ for (i = 0; i < EINIT_TRY_COUNT; i++) {
+ for (j = 0; j < EINIT_SPIN_COUNT; j++) {
+ mutex_lock(&enclave->lock);
+ secs_va = isgx_get_epc_page(secs_epc_page);
+ ret = __einit(sigstruct, einittoken, secs_va);
+ isgx_put_epc_page(secs_va);
+ mutex_unlock(&enclave->lock);
+ if (ret == SGX_UNMASKED_EVENT)
+ continue;
+ else
+ break;
+ }
+
+ if (ret != SGX_UNMASKED_EVENT)
+ goto out;
+
+ msleep_interruptible(EINIT_BACKOFF_TIME);
+ if (signal_pending(current))
+ return -EINTR;
+ }
+
+out:
+ if (ret) {
+ isgx_info(enclave, "EINIT returned %d\n", ret);
+ } else {
+ enclave->flags |= ISGX_ENCLAVE_INITIALIZED;
+
+ if (einittoken->isvsvnle > isgx_isvsvnle_min)
+ isgx_isvsvnle_min = einittoken->isvsvnle;
+ }
+
+ return ret;
+}
+
+static long isgx_ioctl_enclave_init(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret = -EINVAL;
+ struct sgx_init_param *initp = (struct sgx_init_param *)arg;
+ unsigned long enclave_id = initp->addr;
+ char *sigstruct;
+ struct sgx_einittoken *einittoken;
+ struct isgx_enclave *enclave;
+ struct page *initp_page;
+
+ initp_page = alloc_page(GFP_HIGHUSER);
+ if (!initp_page)
+ return -ENOMEM;
+
+ sigstruct = kmap(initp_page);
+ einittoken = (struct sgx_einittoken *)
+ ((unsigned long)sigstruct + PAGE_SIZE / 2);
+
+ ret = copy_from_user(sigstruct, initp->sigstruct, SIGSTRUCT_SIZE);
+ if (ret)
+ goto out_free_page;
+
+ ret = copy_from_user(einittoken, initp->einittoken, EINITTOKEN_SIZE);
+ if (ret)
+ goto out_free_page;
+
+ ret = get_enclave(enclave_id, &enclave);
+ if (ret)
+ goto out_free_page;
+
+ mutex_lock(&enclave->lock);
+ if (enclave->flags & ISGX_ENCLAVE_INITIALIZED) {
+ ret = -EINVAL;
+ mutex_unlock(&enclave->lock);
+ goto out;
+ }
+ mutex_unlock(&enclave->lock);
+
+ flush_work(&enclave->add_page_work);
+
+ ret = __isgx_enclave_init(enclave, sigstruct, einittoken);
+out:
+ kref_put(&enclave->refcount, isgx_enclave_release);
+out_free_page:
+ kunmap(initp_page);
+ __free_page(initp_page);
+ return ret;
+}
+
+static long isgx_ioctl_enclave_destroy(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+ struct sgx_destroy_param *destroyp =
+ (struct sgx_destroy_param *)arg;
+ unsigned long enclave_id = destroyp->addr;
+ struct isgx_enclave *enclave;
+ int ret;
+
+ ret = get_enclave(enclave_id, &enclave);
+ if (ret)
+ return ret;
+
+ vm_munmap(enclave->base, enclave->size);
+ kref_put(&enclave->refcount, isgx_enclave_release);
+
+ return 0;
+}
+
+typedef long (*isgx_ioctl_t)(struct file *filep, unsigned int cmd,
+ unsigned long arg);
+
+long isgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+ char data[256];
+ isgx_ioctl_t handler = NULL;
+ long ret;
+
+ switch (cmd) {
+ case SGX_IOC_ENCLAVE_CREATE:
+ handler = isgx_ioctl_enclave_create;
+ break;
+ case SGX_IOC_ENCLAVE_ADD_PAGE:
+ handler = isgx_ioctl_enclave_add_page;
+ break;
+ case SGX_IOC_ENCLAVE_INIT:
+ handler = isgx_ioctl_enclave_init;
+ break;
+ case SGX_IOC_ENCLAVE_DESTROY:
+ handler = isgx_ioctl_enclave_destroy;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_from_user(data, (void __user *)arg, _IOC_SIZE(cmd)))
+ return -EFAULT;
+
+ ret = handler(filep, cmd, (unsigned long)((void *)data));
+ if (!ret && (cmd & IOC_OUT)) {
+ if (copy_to_user((void __user *)arg, data, _IOC_SIZE(cmd)))
+ return -EFAULT;
+ }
+
+ return ret;
+}
+
+static int do_eadd(struct isgx_epc_page *secs_page,
+ struct isgx_epc_page *epc_page,
+ unsigned long linaddr,
+ struct sgx_secinfo *secinfo,
+ struct page *backing)
+{
+ struct sgx_page_info pginfo;
+ void *epc_page_vaddr;
+ int ret;
+
+ pginfo.srcpge = (unsigned long)kmap_atomic(backing);
+ pginfo.secs = (unsigned long)isgx_get_epc_page(secs_page);
+ epc_page_vaddr = isgx_get_epc_page(epc_page);
+
+ pginfo.linaddr = linaddr;
+ pginfo.secinfo = (unsigned long)secinfo;
+ ret = __eadd(&pginfo, epc_page_vaddr);
+
+ isgx_put_epc_page(epc_page_vaddr);
+ isgx_put_epc_page((void *)(unsigned long)pginfo.secs);
+ kunmap_atomic((void *)(unsigned long)pginfo.srcpge);
+
+ return ret;
+}
+
+static int do_eextend(struct isgx_epc_page *secs_page,
+ struct isgx_epc_page *epc_page)
+{
+ void *secs;
+ void *epc;
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < 0x1000 && !ret; i += 0x100) {
+ secs = isgx_get_epc_page(secs_page);
+ epc = isgx_get_epc_page(epc_page);
+
+ ret = __eextend(secs, (void *)((unsigned long)epc + i));
+
+ isgx_put_epc_page(epc);
+ isgx_put_epc_page(secs);
+ }
+
+ return ret;
+}
+
+static bool process_add_page_req(struct isgx_add_page_req *req)
+{
+ struct page *backing;
+ struct isgx_epc_page *epc_page;
+ struct isgx_enclave_page *enclave_page = req->enclave_page;
+ unsigned int flags = req->flags;
+ struct isgx_enclave *enclave = enclave_page->enclave;
+ unsigned free_flags = ISGX_FREE_SKIP_EREMOVE;
+ struct vm_area_struct *vma;
+ int ret;
+
+ epc_page = isgx_alloc_epc_page(enclave->tgid_ctx, 0);
+ if (IS_ERR(epc_page))
+ return false;
+
+ if (!isgx_pin_mm(enclave)) {
+ isgx_free_epc_page(epc_page, enclave, free_flags);
+ return false;
+ }
+
+ mutex_lock(&enclave->lock);
+
+ if (list_empty(&enclave->vma_list) ||
+ isgx_find_enclave(enclave->mm, enclave_page->addr, &vma))
+ goto out;
+
+ backing = isgx_get_backing(enclave, enclave_page);
+ if (IS_ERR(backing))
+ goto out;
+
+ /* Do not race with do_exit() */
+ if (!atomic_read(&enclave->mm->mm_users)) {
+ isgx_put_backing(backing, 0);
+ goto out;
+ }
+
+ ret = vm_insert_pfn(vma, enclave_page->addr, PFN_DOWN(epc_page->pa));
+ if (ret)
+ goto out;
+
+ ret = do_eadd(enclave->secs_page.epc_page, epc_page,
+ enclave_page->addr, &req->secinfo, backing);
+
+ isgx_put_backing(backing, 0);
+ free_flags = 0;
+ if (ret) {
+ isgx_dbg(enclave, "EADD returned %d\n", ret);
+ zap_vma_ptes(vma, enclave_page->addr, PAGE_SIZE);
+ goto out;
+ }
+
+ enclave->secs_child_cnt++;
+
+ if (!(flags & SGX_ADD_SKIP_EEXTEND)) {
+ ret = do_eextend(enclave->secs_page.epc_page, epc_page);
+ if (ret) {
+ isgx_dbg(enclave, "EEXTEND returned %d\n", ret);
+ zap_vma_ptes(vma, enclave_page->addr, PAGE_SIZE);
+ goto out;
+ }
+ }
+
+ isgx_test_and_clear_young(enclave_page);
+
+ enclave_page->epc_page = epc_page;
+ list_add_tail(&enclave_page->load_list, &enclave->load_list);
+
+ mutex_unlock(&enclave->lock);
+ isgx_unpin_mm(enclave);
+ return true;
+out:
+ isgx_free_epc_page(epc_page, enclave, free_flags);
+ mutex_unlock(&enclave->lock);
+ isgx_unpin_mm(enclave);
+ return false;
+}
+
+void isgx_add_page_worker(struct work_struct *work)
+{
+ struct isgx_enclave *enclave;
+ struct isgx_add_page_req *req;
+ bool skip_rest = false;
+ bool is_empty = false;
+
+ enclave = container_of(work, struct isgx_enclave, add_page_work);
+
+ do {
+ schedule();
+
+ mutex_lock(&enclave->lock);
+ req = list_first_entry(&enclave->add_page_reqs,
+ struct isgx_add_page_req, list);
+ list_del(&req->list);
+ is_empty = list_empty(&enclave->add_page_reqs);
+ mutex_unlock(&enclave->lock);
+
+ if (!skip_rest)
+ if (!process_add_page_req(req))
+ skip_rest = true;
+
+ kfree(req);
+ } while (!kref_put(&enclave->refcount, isgx_enclave_release) &&
+ !is_empty);
+}
diff --git a/drivers/staging/intel_sgx/isgx_main.c b/drivers/staging/intel_sgx/isgx_main.c
new file mode 100644
index 0000000..6554efc5
--- /dev/null
+++ b/drivers/staging/intel_sgx/isgx_main.c
@@ -0,0 +1,369 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * Authors:
+ *
+ * Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxx>
+ * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ * Serge Ayoun <serge.ayoun@xxxxxxxxx>
+ * Shay Katz-zamir <shay.katz-zamir@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include "isgx.h"
+#include <linux/acpi.h>
+#include <linux/compat.h>
+#include <linux/file.h>
+#include <linux/highmem.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/suspend.h>
+#include <linux/hashtable.h>
+#include <linux/kthread.h>
+#include <linux/platform_device.h>
+
+#define DRV_DESCRIPTION "Intel SGX Driver"
+#define DRV_VERSION "0.10"
+
+#define ENCLAVE_SIZE_MAX_64 (64ULL * 1024ULL * 1024ULL * 1024ULL)
+#define ENCLAVE_SIZE_MAX_32 (2ULL * 1024ULL * 1024ULL * 1024ULL)
+
+MODULE_DESCRIPTION(DRV_DESCRIPTION);
+MODULE_AUTHOR("Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxx>");
+MODULE_VERSION(DRV_VERSION);
+
+/*
+ * Global data.
+ */
+
+struct workqueue_struct *isgx_add_page_wq;
+unsigned long isgx_epc_base;
+unsigned long isgx_epc_size;
+#ifdef CONFIG_X86_64
+void *isgx_epc_mem;
+#endif
+u64 isgx_enclave_size_max_32 = ENCLAVE_SIZE_MAX_32;
+u64 isgx_enclave_size_max_64 = ENCLAVE_SIZE_MAX_64;
+u64 isgx_xfrm_mask = 0x3;
+u32 isgx_ssaframesize_tbl[64];
+
+/*
+ * Local data.
+ */
+
+static int isgx_mmap(struct file *file, struct vm_area_struct *vma);
+
+static unsigned long isgx_get_unmapped_area(struct file *file,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags);
+
+static const struct file_operations isgx_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = isgx_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = isgx_compat_ioctl,
+#endif
+ .mmap = isgx_mmap,
+ .get_unmapped_area = isgx_get_unmapped_area,
+};
+
+static struct miscdevice isgx_dev = {
+ .name = "sgx",
+ .fops = &isgx_fops,
+ .mode = S_IRUGO | S_IWUGO,
+};
+
+static int isgx_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ vma->vm_ops = &isgx_vm_ops;
+#if !defined(VM_RESERVED)
+ vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+ vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+
+ return 0;
+}
+
+static int isgx_init_platform(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ int i;
+
+ cpuid(0, &eax, &ebx, &ecx, &edx);
+ if (eax < SGX_CPUID) {
+ pr_err("isgx: CPUID is missing the SGX leaf instruction\n");
+ return -ENODEV;
+ }
+
+ if (!boot_cpu_has(X86_FEATURE_SGX)) {
+ pr_err("isgx: CPU is missing the SGX feature\n");
+ return -ENODEV;
+ }
+
+ cpuid_count(SGX_CPUID, 0x0, &eax, &ebx, &ecx, &edx);
+ if (!(eax & 1)) {
+ pr_err("isgx: CPU does not support the SGX 1.0 instruction set\n");
+ return -ENODEV;
+ }
+
+ if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+ cpuid_count(SGX_CPUID, 0x1, &eax, &ebx, &ecx, &edx);
+ isgx_xfrm_mask = (((u64)edx) << 32) + (u64)ecx;
+ for (i = 2; i < 64; i++) {
+ cpuid_count(0x0D, i, &eax, &ebx, &ecx, &edx);
+ if ((1 << i) & isgx_xfrm_mask)
+ isgx_ssaframesize_tbl[i] =
+ (168 + eax + ebx + PAGE_SIZE - 1) /
+ PAGE_SIZE;
+ }
+ }
+
+ cpuid_count(SGX_CPUID, 0x0, &eax, &ebx, &ecx, &edx);
+ if (edx & 0xFFFF) {
+#ifdef CONFIG_X86_64
+ isgx_enclave_size_max_64 = 2ULL << (edx & 0xFF);
+#endif
+ isgx_enclave_size_max_32 = 2ULL << ((edx >> 8) & 0xFF);
+ }
+
+ cpuid_count(SGX_CPUID, 0x2, &eax, &ebx, &ecx, &edx);
+
+ /* The should be at least one EPC area or something is wrong. */
+ if ((eax & 0xf) != 0x1)
+ return -ENODEV;
+
+ isgx_epc_base = (((u64)(ebx & 0xfffff)) << 32) +
+ (u64)(eax & 0xfffff000);
+ isgx_epc_size = (((u64)(edx & 0xfffff)) << 32) +
+ (u64)(ecx & 0xfffff000);
+
+ if (!isgx_epc_base)
+ return -ENODEV;
+
+ return 0;
+}
+
+static int isgx_pm_suspend(struct device *dev)
+{
+ struct isgx_tgid_ctx *ctx;
+ struct isgx_enclave *encl;
+
+ kthread_stop(kisgxswapd_tsk);
+ kisgxswapd_tsk = NULL;
+
+ list_for_each_entry(ctx, &isgx_tgid_ctx_list, list) {
+ list_for_each_entry(encl, &ctx->enclave_list, enclave_list) {
+ isgx_invalidate(encl);
+ encl->flags |= ISGX_ENCLAVE_SUSPEND;
+ }
+ }
+
+ return 0;
+}
+
+static int isgx_pm_resume(struct device *dev)
+{
+ kisgxswapd_tsk = kthread_run(kisgxswapd, NULL, "kisgxswapd");
+ return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(isgx_drv_pm, isgx_pm_suspend, isgx_pm_resume);
+
+static int isgx_drv_init(struct device *dev)
+{
+ unsigned int wq_flags;
+ int ret;
+
+ pr_info("isgx: " DRV_DESCRIPTION " v" DRV_VERSION "\n");
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return -ENODEV;
+
+ ret = isgx_init_platform();
+ if (ret)
+ return ret;
+
+ pr_info("isgx: EPC memory range 0x%lx-0x%lx\n", isgx_epc_base,
+ isgx_epc_base + isgx_epc_size);
+
+#ifdef CONFIG_X86_64
+ isgx_epc_mem = ioremap_cache(isgx_epc_base, isgx_epc_size);
+ if (!isgx_epc_mem)
+ return -ENOMEM;
+#endif
+
+ ret = isgx_page_cache_init(isgx_epc_base, isgx_epc_size);
+ if (ret)
+ goto out_iounmap;
+
+ wq_flags = WQ_UNBOUND | WQ_FREEZABLE;
+#ifdef WQ_NON_REENETRANT
+ wq_flags |= WQ_NON_REENTRANT;
+#endif
+ isgx_add_page_wq = alloc_workqueue("isgx-add-page-wq", wq_flags, 1);
+ if (!isgx_add_page_wq) {
+ pr_err("isgx: alloc_workqueue() failed\n");
+ ret = -ENOMEM;
+ goto out_iounmap;
+ }
+
+ isgx_dev.parent = dev;
+ ret = misc_register(&isgx_dev);
+ if (ret) {
+ pr_err("isgx: misc_register() failed\n");
+ goto out_workqueue;
+ }
+
+ return 0;
+out_workqueue:
+ destroy_workqueue(isgx_add_page_wq);
+out_iounmap:
+#ifdef CONFIG_X86_64
+ iounmap(isgx_epc_mem);
+#endif
+ return ret;
+}
+
+static int isgx_drv_probe(struct platform_device *pdev)
+{
+ unsigned int eax, ebx, ecx, edx;
+ int i;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return -ENODEV;
+
+ cpuid(0, &eax, &ebx, &ecx, &edx);
+ if (eax < SGX_CPUID) {
+ pr_err("isgx: CPUID is missing the SGX leaf instruction\n");
+ return -ENODEV;
+ }
+
+ if (!boot_cpu_has(X86_FEATURE_SGX)) {
+ pr_err("isgx: CPU is missing the SGX feature\n");
+ return -ENODEV;
+ }
+
+ cpuid_count(SGX_CPUID, 0x0, &eax, &ebx, &ecx, &edx);
+ if (!(eax & 1)) {
+ pr_err("isgx: CPU does not support the SGX 1.0 instruction set\n");
+ return -ENODEV;
+ }
+
+ if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+ cpuid_count(SGX_CPUID, 0x1, &eax, &ebx, &ecx, &edx);
+ isgx_xfrm_mask = (((u64)edx) << 32) + (u64)ecx;
+ for (i = 2; i < 64; i++) {
+ cpuid_count(0x0D, i, &eax, &ebx, &ecx, &edx);
+ if ((1 << i) & isgx_xfrm_mask)
+ isgx_ssaframesize_tbl[i] =
+ (168 + eax + ebx + PAGE_SIZE - 1) /
+ PAGE_SIZE;
+ }
+ }
+
+ cpuid_count(SGX_CPUID, 0x0, &eax, &ebx, &ecx, &edx);
+ if (edx & 0xFFFF) {
+#ifdef CONFIG_X86_64
+ isgx_enclave_size_max_64 = 2ULL << (edx & 0xFF);
+#endif
+ isgx_enclave_size_max_32 = 2ULL << ((edx >> 8) & 0xFF);
+ }
+
+ return isgx_drv_init(&pdev->dev);
+}
+
+static int isgx_drv_remove(struct platform_device *pdev)
+{
+ misc_deregister(&isgx_dev);
+ destroy_workqueue(isgx_add_page_wq);
+#ifdef CONFIG_X86_64
+ iounmap(isgx_epc_mem);
+#endif
+ isgx_page_cache_teardown();
+
+ return 0;
+}
+
+static struct platform_driver isgx_drv = {
+ .probe = isgx_drv_probe,
+ .remove = isgx_drv_remove,
+ .driver = {
+ .name = "intel_sgx",
+ .pm = &isgx_drv_pm,
+ },
+};
+
+static struct platform_device *isgx_pdev;
+
+static int __init isgx_init(void)
+{
+ struct platform_device *pdev;
+ int rc;
+
+ rc = platform_driver_register(&isgx_drv);
+ if (rc < 0)
+ return rc;
+
+ pdev = platform_device_register_simple("intel_sgx", -1, NULL, 0);
+ if (IS_ERR(pdev)) {
+ platform_driver_unregister(&isgx_drv);
+ return PTR_ERR(pdev);
+ }
+
+ isgx_pdev = pdev;
+
+ return 0;
+}
+
+static void __exit isgx_exit(void)
+{
+ platform_device_unregister(isgx_pdev);
+ platform_driver_unregister(&isgx_drv);
+}
+
+static unsigned long isgx_get_unmapped_area(struct file *file,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags)
+{
+ if (len < 2 * PAGE_SIZE || (len & (len - 1)))
+ return -EINVAL;
+
+ /* On 64-bit architecture, allow mmap() to exceed 32-bit enclave
+ * limit only if the task is not running in 32-bit compatibility
+ * mode.
+ */
+ if (len > isgx_enclave_size_max_32)
+#ifdef CONFIG_X86_64
+ if (test_thread_flag(TIF_ADDR32))
+ return -EINVAL;
+#else
+ return -EINVAL;
+#endif
+
+#ifdef CONFIG_X86_64
+ if (len > isgx_enclave_size_max_64)
+ return -EINVAL;
+#endif
+
+ addr = current->mm->get_unmapped_area(file, addr, 2 * len, pgoff,
+ flags);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
+ addr = (addr + (len - 1)) & ~(len - 1);
+
+ return addr;
+}
+
+module_init(isgx_init);
+module_exit(isgx_exit);
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/intel_sgx/isgx_page_cache.c b/drivers/staging/intel_sgx/isgx_page_cache.c
new file mode 100644
index 0000000..f0224e8
--- /dev/null
+++ b/drivers/staging/intel_sgx/isgx_page_cache.c
@@ -0,0 +1,485 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * Authors:
+ *
+ * Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxx>
+ * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ * Serge Ayoun <serge.ayoun@xxxxxxxxx>
+ * Shay Katz-zamir <shay.katz-zamir@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include "isgx.h"
+#include <linux/freezer.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/ratelimit.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+static LIST_HEAD(isgx_free_list);
+static DEFINE_SPINLOCK(isgx_free_list_lock);
+
+LIST_HEAD(isgx_tgid_ctx_list);
+/* mutex for the TGID list */
+DEFINE_MUTEX(isgx_tgid_ctx_mutex);
+static unsigned int isgx_nr_total_epc_pages;
+static unsigned int isgx_nr_free_epc_pages;
+static unsigned int isgx_nr_low_epc_pages = ISGX_NR_LOW_EPC_PAGES_DEFAULT;
+static unsigned int isgx_nr_high_epc_pages;
+struct task_struct *kisgxswapd_tsk;
+static DECLARE_WAIT_QUEUE_HEAD(kisgxswapd_waitq);
+
+static struct isgx_tgid_ctx *isolate_tgid_ctx(unsigned long nr_to_scan)
+{
+ struct isgx_tgid_ctx *ctx = NULL;
+ int i;
+
+ mutex_lock(&isgx_tgid_ctx_mutex);
+
+ if (list_empty(&isgx_tgid_ctx_list)) {
+ mutex_unlock(&isgx_tgid_ctx_mutex);
+ return NULL;
+ }
+
+ for (i = 0; i < nr_to_scan; i++) {
+ /* Peek TGID context from the head. */
+ ctx = list_first_entry(&isgx_tgid_ctx_list,
+ struct isgx_tgid_ctx,
+ list);
+
+ /* Move to the tail so that we do not encounter it in the
+ * next iteration.
+ */
+ list_move_tail(&ctx->list, &isgx_tgid_ctx_list);
+
+ /* Non-empty TGID context? */
+ if (!list_empty(&ctx->enclave_list) &&
+ kref_get_unless_zero(&ctx->refcount))
+ break;
+
+ ctx = NULL;
+ }
+
+ mutex_unlock(&isgx_tgid_ctx_mutex);
+
+ return ctx;
+}
+
+static struct isgx_enclave *isolate_enclave(struct isgx_tgid_ctx *ctx,
+ unsigned long nr_to_scan)
+{
+ struct isgx_enclave *encl = NULL;
+ int i;
+
+ mutex_lock(&isgx_tgid_ctx_mutex);
+
+ if (list_empty(&ctx->enclave_list)) {
+ mutex_unlock(&isgx_tgid_ctx_mutex);
+ return NULL;
+ }
+
+ for (i = 0; i < nr_to_scan; i++) {
+ /* Peek enclave from the head. */
+ encl = list_first_entry(&ctx->enclave_list,
+ struct isgx_enclave,
+ enclave_list);
+
+ /* Move to the tail so that we do not encounter it in the
+ * next iteration.
+ */
+ list_move_tail(&encl->enclave_list, &ctx->enclave_list);
+
+ /* Enclave with faulted pages? */
+ if (!list_empty(&encl->load_list) &&
+ kref_get_unless_zero(&encl->refcount))
+ break;
+
+ encl = NULL;
+ }
+
+ mutex_unlock(&isgx_tgid_ctx_mutex);
+
+ return encl;
+}
+
+static void sgx_isolate_pages(struct isgx_enclave *encl,
+ struct list_head *dst,
+ unsigned long nr_to_scan)
+{
+ struct isgx_enclave_page *entry;
+ int i;
+
+ mutex_lock(&encl->lock);
+
+ for (i = 0; i < nr_to_scan; i++) {
+ if (list_empty(&encl->load_list))
+ break;
+
+ entry = list_first_entry(&encl->load_list,
+ struct isgx_enclave_page,
+ load_list);
+
+ if (!(entry->flags & ISGX_ENCLAVE_PAGE_RESERVED)) {
+ entry->flags |= ISGX_ENCLAVE_PAGE_RESERVED;
+ list_move_tail(&entry->load_list, dst);
+ } else {
+ list_move_tail(&entry->load_list, &encl->load_list);
+ }
+ }
+
+ mutex_unlock(&encl->lock);
+}
+
+static void isgx_ipi_cb(void *info)
+{
+}
+
+static void do_eblock(struct isgx_epc_page *epc_page)
+{
+ void *vaddr;
+
+ vaddr = isgx_get_epc_page(epc_page);
+ BUG_ON(__eblock((unsigned long)vaddr));
+ isgx_put_epc_page(vaddr);
+}
+
+static void do_etrack(struct isgx_epc_page *epc_page)
+{
+ void *epc;
+
+ epc = isgx_get_epc_page(epc_page);
+ BUG_ON(__etrack(epc));
+ isgx_put_epc_page(epc);
+}
+
+static int do_ewb(struct isgx_enclave *enclave,
+ struct isgx_enclave_page *enclave_page,
+ struct page *backing)
+{
+ struct sgx_page_info pginfo;
+ void *epc;
+ void *va;
+ int ret;
+
+ pginfo.srcpge = (unsigned long)kmap_atomic(backing);
+ epc = isgx_get_epc_page(enclave_page->epc_page);
+ va = isgx_get_epc_page(enclave_page->va_page->epc_page);
+
+ pginfo.pcmd = (unsigned long)&enclave_page->pcmd;
+ pginfo.linaddr = 0;
+ pginfo.secs = 0;
+ ret = __ewb(&pginfo, epc,
+ (void *)((unsigned long)va + enclave_page->va_offset));
+
+ isgx_put_epc_page(va);
+ isgx_put_epc_page(epc);
+ kunmap_atomic((void *)(unsigned long)pginfo.srcpge);
+
+ if (ret != 0 && ret != SGX_NOT_TRACKED)
+ isgx_err(enclave, "EWB returned %d\n", ret);
+
+ return ret;
+}
+
+void sgx_evict_page(struct isgx_enclave_page *entry,
+ struct isgx_enclave *encl,
+ unsigned int flags)
+{
+ isgx_free_epc_page(entry->epc_page, encl, flags);
+ entry->epc_page = NULL;
+ entry->flags &= ~ISGX_ENCLAVE_PAGE_RESERVED;
+}
+
+static void sgx_write_pages(struct list_head *src)
+{
+ struct isgx_enclave *enclave;
+ struct isgx_enclave_page *entry;
+ struct isgx_enclave_page *tmp;
+ struct page *pages[ISGX_NR_SWAP_CLUSTER_MAX + 1];
+ struct isgx_vma *evma;
+ int cnt = 0;
+ int i = 0;
+ int ret;
+
+ if (list_empty(src))
+ return;
+
+ entry = list_first_entry(src, struct isgx_enclave_page, load_list);
+ enclave = entry->enclave;
+
+ if (!isgx_pin_mm(enclave)) {
+ while (!list_empty(src)) {
+ entry = list_first_entry(src, struct isgx_enclave_page,
+ load_list);
+ list_del(&entry->load_list);
+ mutex_lock(&enclave->lock);
+ sgx_evict_page(entry, enclave, 0);
+ mutex_unlock(&enclave->lock);
+ }
+
+ return;
+ }
+
+ /* EBLOCK */
+
+ list_for_each_entry_safe(entry, tmp, src, load_list) {
+ mutex_lock(&enclave->lock);
+ evma = isgx_find_vma(enclave, entry->addr);
+ if (!evma) {
+ list_del(&entry->load_list);
+ sgx_evict_page(entry, enclave, 0);
+ mutex_unlock(&enclave->lock);
+ continue;
+ }
+
+ pages[cnt] = isgx_get_backing(enclave, entry);
+ if (IS_ERR(pages[cnt])) {
+ list_del(&entry->load_list);
+ list_add_tail(&entry->load_list, &enclave->load_list);
+ entry->flags &= ~ISGX_ENCLAVE_PAGE_RESERVED;
+ mutex_unlock(&enclave->lock);
+ continue;
+ }
+
+ zap_vma_ptes(evma->vma, entry->addr, PAGE_SIZE);
+ do_eblock(entry->epc_page);
+ cnt++;
+ mutex_unlock(&enclave->lock);
+ }
+
+ /* ETRACK */
+
+ mutex_lock(&enclave->lock);
+ do_etrack(enclave->secs_page.epc_page);
+ mutex_unlock(&enclave->lock);
+
+ /* EWB */
+
+ mutex_lock(&enclave->lock);
+ i = 0;
+
+ while (!list_empty(src)) {
+ entry = list_first_entry(src, struct isgx_enclave_page,
+ load_list);
+ list_del(&entry->load_list);
+
+ evma = isgx_find_vma(enclave, entry->addr);
+ if (evma) {
+ ret = do_ewb(enclave, entry, pages[i]);
+ BUG_ON(ret != 0 && ret != SGX_NOT_TRACKED);
+ /* Only kick out threads with an IPI if needed. */
+ if (ret) {
+ smp_call_function(isgx_ipi_cb, NULL, 1);
+ BUG_ON(do_ewb(enclave, entry, pages[i]));
+ }
+ enclave->secs_child_cnt--;
+ }
+
+ sgx_evict_page(entry, enclave, evma ? ISGX_FREE_SKIP_EREMOVE : 0);
+ isgx_put_backing(pages[i++], evma);
+ }
+
+ /* Allow SECS page eviction only when the enclave is initialized. */
+ if (!enclave->secs_child_cnt &&
+ (enclave->flags & ISGX_ENCLAVE_INITIALIZED)) {
+ pages[cnt] = isgx_get_backing(enclave, &enclave->secs_page);
+ if (!IS_ERR(pages[cnt])) {
+ BUG_ON(do_ewb(enclave, &enclave->secs_page,
+ pages[cnt]));
+ enclave->flags |= ISGX_ENCLAVE_SECS_EVICTED;
+
+ sgx_evict_page(&enclave->secs_page, NULL,
+ ISGX_FREE_SKIP_EREMOVE);
+ isgx_put_backing(pages[cnt], true);
+ }
+ }
+
+ mutex_unlock(&enclave->lock);
+ BUG_ON(i != cnt);
+
+ isgx_unpin_mm(enclave);
+}
+
+static void sgx_swap_pages(unsigned long nr_to_scan)
+{
+ struct isgx_tgid_ctx *ctx;
+ struct isgx_enclave *encl;
+ LIST_HEAD(cluster);
+
+ ctx = isolate_tgid_ctx(nr_to_scan);
+ if (!ctx)
+ return;
+
+ encl = isolate_enclave(ctx, nr_to_scan);
+ if (!encl)
+ goto out;
+
+ sgx_isolate_pages(encl, &cluster, nr_to_scan);
+ sgx_write_pages(&cluster);
+
+ kref_put(&encl->refcount, isgx_enclave_release);
+out:
+ kref_put(&ctx->refcount, release_tgid_ctx);
+}
+
+int kisgxswapd(void *p)
+{
+ DEFINE_WAIT(wait);
+ unsigned int nr_free;
+ unsigned int nr_high;
+
+ for ( ; ; ) {
+ if (kthread_should_stop())
+ break;
+
+ spin_lock(&isgx_free_list_lock);
+ nr_free = isgx_nr_free_epc_pages;
+ nr_high = isgx_nr_high_epc_pages;
+ spin_unlock(&isgx_free_list_lock);
+
+ if (nr_free < nr_high) {
+ sgx_swap_pages(ISGX_NR_SWAP_CLUSTER_MAX);
+ schedule();
+ } else {
+ prepare_to_wait(&kisgxswapd_waitq,
+ &wait, TASK_INTERRUPTIBLE);
+
+ if (!kthread_should_stop())
+ schedule();
+
+ finish_wait(&kisgxswapd_waitq, &wait);
+ }
+ }
+
+ pr_info("%s: done\n", __func__);
+ return 0;
+}
+
+int isgx_page_cache_init(resource_size_t start, unsigned long size)
+{
+ unsigned long i;
+ struct isgx_epc_page *new_epc_page, *entry;
+ struct list_head *parser, *temp;
+
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ new_epc_page = kzalloc(sizeof(*new_epc_page), GFP_KERNEL);
+ if (!new_epc_page)
+ goto err_freelist;
+ new_epc_page->pa = start + i;
+
+ spin_lock(&isgx_free_list_lock);
+ list_add_tail(&new_epc_page->free_list, &isgx_free_list);
+ isgx_nr_total_epc_pages++;
+ isgx_nr_free_epc_pages++;
+ spin_unlock(&isgx_free_list_lock);
+ }
+
+ isgx_nr_high_epc_pages = 2 * isgx_nr_low_epc_pages;
+ kisgxswapd_tsk = kthread_run(kisgxswapd, NULL, "kisgxswapd");
+
+ return 0;
+err_freelist:
+ list_for_each_safe(parser, temp, &isgx_free_list) {
+ spin_lock(&isgx_free_list_lock);
+ entry = list_entry(parser, struct isgx_epc_page, free_list);
+ list_del(&entry->free_list);
+ spin_unlock(&isgx_free_list_lock);
+ kfree(entry);
+ }
+ return -ENOMEM;
+}
+
+void isgx_page_cache_teardown(void)
+{
+ struct isgx_epc_page *entry;
+ struct list_head *parser, *temp;
+
+ if (kisgxswapd_tsk)
+ kthread_stop(kisgxswapd_tsk);
+
+ spin_lock(&isgx_free_list_lock);
+ list_for_each_safe(parser, temp, &isgx_free_list) {
+ entry = list_entry(parser, struct isgx_epc_page, free_list);
+ list_del(&entry->free_list);
+ kfree(entry);
+ }
+ spin_unlock(&isgx_free_list_lock);
+}
+
+static struct isgx_epc_page *isgx_alloc_epc_page_fast(void)
+{
+ struct isgx_epc_page *entry = NULL;
+
+ spin_lock(&isgx_free_list_lock);
+
+ if (!list_empty(&isgx_free_list)) {
+ entry = list_first_entry(&isgx_free_list, struct isgx_epc_page,
+ free_list);
+ list_del(&entry->free_list);
+ isgx_nr_free_epc_pages--;
+ }
+
+ spin_unlock(&isgx_free_list_lock);
+
+ return entry;
+}
+
+struct isgx_epc_page *isgx_alloc_epc_page(
+ struct isgx_tgid_ctx *tgid_epc_cnt,
+ unsigned int flags)
+{
+ struct isgx_epc_page *entry;
+
+ for ( ; ; ) {
+ entry = isgx_alloc_epc_page_fast();
+ if (entry) {
+ if (tgid_epc_cnt)
+ atomic_inc(&tgid_epc_cnt->epc_cnt);
+ break;
+ } else if (flags & ISGX_ALLOC_ATOMIC) {
+ entry = ERR_PTR(-EBUSY);
+ break;
+ }
+
+ if (signal_pending(current)) {
+ entry = ERR_PTR(-ERESTARTSYS);
+ break;
+ }
+
+ sgx_swap_pages(ISGX_NR_SWAP_CLUSTER_MAX);
+ schedule();
+ }
+
+ if (isgx_nr_free_epc_pages < isgx_nr_low_epc_pages)
+ wake_up(&kisgxswapd_waitq);
+
+ return entry;
+}
+
+void isgx_free_epc_page(struct isgx_epc_page *entry,
+ struct isgx_enclave *encl,
+ unsigned int flags)
+{
+ BUG_ON(!entry);
+
+ if (encl) {
+ atomic_dec(&encl->tgid_ctx->epc_cnt);
+
+ if (encl->flags & ISGX_ENCLAVE_SUSPEND)
+ flags |= ISGX_FREE_SKIP_EREMOVE;
+ }
+
+ if (!(flags & ISGX_FREE_SKIP_EREMOVE))
+ BUG_ON(isgx_eremove(entry));
+
+ spin_lock(&isgx_free_list_lock);
+ list_add(&entry->free_list, &isgx_free_list);
+ isgx_nr_free_epc_pages++;
+ spin_unlock(&isgx_free_list_lock);
+}
diff --git a/drivers/staging/intel_sgx/isgx_user.h b/drivers/staging/intel_sgx/isgx_user.h
new file mode 100644
index 0000000..672d19c
--- /dev/null
+++ b/drivers/staging/intel_sgx/isgx_user.h
@@ -0,0 +1,113 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * Authors:
+ *
+ * Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxx>
+ * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ * Serge Ayoun <serge.ayoun@xxxxxxxxx>
+ * Shay Katz-zamir <shay.katz-zamir@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#ifndef _UAPI_ASM_X86_SGX_H
+#define _UAPI_ASM_X86_SGX_H
+
+#include <linux/bitops.h>
+#include <linux/ioctl.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+#define SGX_IOC_ENCLAVE_CREATE \
+ _IOWR('p', 0x02, struct sgx_create_param)
+#define SGX_IOC_ENCLAVE_ADD_PAGE \
+ _IOW('p', 0x03, struct sgx_add_param)
+#define SGX_IOC_ENCLAVE_INIT \
+ _IOW('p', 0x04, struct sgx_init_param)
+#define SGX_IOC_ENCLAVE_DESTROY \
+ _IOW('p', 0x06, struct sgx_destroy_param)
+
+/* SGX leaf instruction return values */
+#define SGX_SUCCESS 0
+#define SGX_INVALID_SIG_STRUCT 1
+#define SGX_INVALID_ATTRIBUTE 2
+#define SGX_BLKSTATE 3
+#define SGX_INVALID_MEASUREMENT 4
+#define SGX_NOTBLOCKABLE 5
+#define SGX_PG_INVLD 6
+#define SGX_LOCKFAIL 7
+#define SGX_INVALID_SIGNATURE 8
+#define SGX_MAC_COMPARE_FAIL 9
+#define SGX_PAGE_NOT_BLOCKED 10
+#define SGX_NOT_TRACKED 11
+#define SGX_VA_SLOT_OCCUPIED 12
+#define SGX_CHILD_PRESENT 13
+#define SGX_ENCLAVE_ACT 14
+#define SGX_ENTRYEPOCH_LOCKED 15
+#define SGX_INVALID_LICENSE 16
+#define SGX_PREV_TRK_INCMPL 17
+#define SGX_PG_IS_SECS 18
+#define SGX_INVALID_CPUSVN 32
+#define SGX_INVALID_ISVSVN 64
+#define SGX_UNMASKED_EVENT 128
+#define SGX_INVALID_KEYNAME 256
+
+/* IOCTL return values */
+#define SGX_POWER_LOST_ENCLAVE 0xc0000002
+#define SGX_LE_ROLLBACK 0xc0000003
+
+/* SECINFO flags */
+enum isgx_secinfo_flags {
+ SGX_SECINFO_FL_R = BIT_ULL(0),
+ SGX_SECINFO_FL_W = BIT_ULL(1),
+ SGX_SECINFO_FL_X = BIT_ULL(2),
+};
+
+/* SECINFO page types */
+enum isgx_secinfo_pt {
+ SGX_SECINFO_PT_SECS = 0x000ULL,
+ SGX_SECINFO_PT_TCS = 0x100ULL,
+ SGX_SECINFO_PT_REG = 0x200ULL,
+};
+
+struct sgx_secinfo {
+ __u64 flags;
+ __u64 reserved[7];
+} __aligned(128);
+
+struct sgx_einittoken {
+ __u32 valid;
+ __u8 reserved1[206];
+ __u16 isvsvnle;
+ __u8 reserved2[92];
+} __aligned(512);
+
+struct sgx_create_param {
+ void *secs;
+ unsigned long addr;
+};
+
+#define SGX_ADD_SKIP_EEXTEND 0x1
+
+struct sgx_add_param {
+ unsigned long addr;
+ unsigned long user_addr;
+ struct isgx_secinfo *secinfo;
+ unsigned int flags;
+};
+
+struct sgx_init_param {
+ unsigned long addr;
+ void *sigstruct;
+ struct isgx_einittoken *einittoken;
+};
+
+struct sgx_destroy_param {
+ unsigned long addr;
+};
+
+#endif /* _UAPI_ASM_X86_SGX_H */
diff --git a/drivers/staging/intel_sgx/isgx_util.c b/drivers/staging/intel_sgx/isgx_util.c
new file mode 100644
index 0000000..c635014
--- /dev/null
+++ b/drivers/staging/intel_sgx/isgx_util.c
@@ -0,0 +1,334 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * Authors:
+ *
+ * Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxx>
+ * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ * Serge Ayoun <serge.ayoun@xxxxxxxxx>
+ * Shay Katz-zamir <shay.katz-zamir@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include "isgx.h"
+#include <linux/highmem.h>
+#include <linux/shmem_fs.h>
+
+void *isgx_get_epc_page(struct isgx_epc_page *entry)
+{
+#ifdef CONFIG_X86_32
+ return kmap_atomic_pfn(PFN_DOWN(entry->pa));
+#else
+ return isgx_epc_mem + (entry->pa - isgx_epc_base);
+#endif
+}
+
+void isgx_put_epc_page(void *epc_page_vaddr)
+{
+#ifdef CONFIG_X86_32
+ kunmap_atomic(epc_page_vaddr);
+#else
+#endif
+}
+
+struct page *isgx_get_backing(struct isgx_enclave *enclave,
+ struct isgx_enclave_page *entry)
+{
+ struct page *backing;
+ struct inode *inode;
+ struct address_space *mapping;
+ gfp_t gfpmask;
+ pgoff_t index;
+
+ inode = enclave->backing->f_path.dentry->d_inode;
+ mapping = inode->i_mapping;
+ gfpmask = mapping_gfp_mask(mapping);
+
+ index = (entry->addr - enclave->base) >> PAGE_SHIFT;
+ backing = shmem_read_mapping_page_gfp(mapping, index, gfpmask);
+
+ return backing;
+}
+
+void isgx_put_backing(struct page *backing_page, bool write)
+{
+ if (write)
+ set_page_dirty(backing_page);
+
+ page_cache_release(backing_page);
+}
+
+int isgx_eremove(struct isgx_epc_page *epc_page)
+{
+ void *epc;
+ int ret;
+
+ epc = isgx_get_epc_page(epc_page);
+ ret = __eremove(epc);
+ isgx_put_epc_page(epc);
+
+ if (ret)
+ pr_debug_ratelimited("EREMOVE returned %d\n", ret);
+
+ return ret;
+}
+
+static int isgx_test_and_clear_young_cb(pte_t *ptep, pgtable_t token,
+ unsigned long addr, void *data)
+{
+ pte_t pte;
+ int rc;
+
+ rc = pte_young(*ptep);
+ if (rc) {
+ pte = pte_mkold(*ptep);
+ set_pte_at((struct mm_struct *)data, addr, ptep, pte);
+ }
+
+ return rc;
+}
+
+/**
+ * isgx_test_and_clear_young() - is the enclave page recently accessed?
+ * @page: enclave page to be tested for recent access
+ *
+ * Checks the Access (A) bit from the PTE corresponding to the
+ * enclave page and clears it. Returns 1 if the page has been
+ * recently accessed and 0 if not.
+ */
+int isgx_test_and_clear_young(struct isgx_enclave_page *page)
+{
+ struct mm_struct *mm;
+ struct isgx_vma *evma = isgx_find_vma(page->enclave, page->addr);
+
+ if (!evma)
+ return 0;
+
+ mm = evma->vma->vm_mm;
+
+ return apply_to_page_range(mm, page->addr, PAGE_SIZE,
+ isgx_test_and_clear_young_cb, mm);
+}
+
+/**
+ * isgx_find_vma() - find VMA for the enclave address
+ * @enclave: the enclave to be searched
+ * @addr: the linear address to query
+ *
+ * Finds VMA for the given address of the enclave. Returns the VMA if
+ * there is one containing the given address.
+ */
+struct isgx_vma *isgx_find_vma(struct isgx_enclave *enclave,
+ unsigned long addr)
+{
+ struct isgx_vma *tmp;
+ struct isgx_vma *evma;
+
+ list_for_each_entry_safe(evma, tmp, &enclave->vma_list, vma_list) {
+ if (evma->vma->vm_start <= addr && evma->vma->vm_end > addr)
+ return evma;
+ }
+
+ isgx_dbg(enclave, "cannot find VMA at 0x%lx\n", addr);
+ return NULL;
+}
+
+/**
+ * isgx_zap_tcs_ptes() - clear PTEs that contain TCS pages
+ * @enclave an enclave
+ * @vma: a VMA of the enclave
+ */
+void isgx_zap_tcs_ptes(struct isgx_enclave *enclave, struct vm_area_struct *vma)
+{
+ struct isgx_enclave_page *entry;
+ struct rb_node *rb;
+
+ rb = rb_first(&enclave->enclave_rb);
+ while (rb) {
+ entry = container_of(rb, struct isgx_enclave_page, node);
+ rb = rb_next(rb);
+ if (entry->epc_page && (entry->flags & ISGX_ENCLAVE_PAGE_TCS) &&
+ entry->addr >= vma->vm_start &&
+ entry->addr < vma->vm_end)
+ zap_vma_ptes(vma, entry->addr, PAGE_SIZE);
+ }
+}
+
+/**
+ * isgx_pin_mm - pin the mm_struct of an enclave
+ *
+ * @encl: an enclave
+ *
+ * Locks down mmap_sem of an enclave if it still has VMAs and was not suspended.
+ * Returns true if this the case.
+ */
+bool isgx_pin_mm(struct isgx_enclave *encl)
+{
+ if (encl->flags & ISGX_ENCLAVE_SUSPEND)
+ return false;
+
+ mutex_lock(&encl->lock);
+ if (!list_empty(&encl->vma_list)) {
+ atomic_inc(&encl->mm->mm_count);
+ } else {
+ mutex_unlock(&encl->lock);
+ return false;
+ }
+ mutex_unlock(&encl->lock);
+
+ down_read(&encl->mm->mmap_sem);
+
+ if (list_empty(&encl->vma_list)) {
+ isgx_unpin_mm(encl);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * isgx_unpin_mm - unpin the mm_struct of an enclave
+ *
+ * @encl: an enclave
+ *
+ * Unlocks the mmap_sem.
+ */
+void isgx_unpin_mm(struct isgx_enclave *encl)
+{
+ up_read(&encl->mm->mmap_sem);
+ mmdrop(encl->mm);
+}
+
+/**
+ * isgx_unpin_mm - invalidate the enclave
+ *
+ * @encl: an enclave
+ *
+ * Unmap TCS pages and empty the VMA list.
+ */
+void isgx_invalidate(struct isgx_enclave *encl)
+{
+ struct isgx_vma *vma;
+
+ list_for_each_entry(vma, &encl->vma_list, vma_list)
+ isgx_zap_tcs_ptes(encl, vma->vma);
+
+ while (!list_empty(&encl->vma_list)) {
+ vma = list_first_entry(&encl->vma_list, struct isgx_vma,
+ vma_list);
+ list_del(&vma->vma_list);
+ kfree(vma);
+ }
+}
+
+/**
+ * isgx_find_enclave() - find enclave given a virtual address
+ * @mm: the address space where we query the enclave
+ * @addr: the virtual address to query
+ * @vma: VMA if an enclave is found or NULL if not
+ *
+ * Finds an enclave given a virtual address and a address space where to seek it
+ * from. The return value is zero on success. Otherwise, it is either positive
+ * for SGX specific errors or negative for the system errors.
+ */
+int isgx_find_enclave(struct mm_struct *mm, unsigned long addr,
+ struct vm_area_struct **vma)
+{
+ struct isgx_enclave *enclave;
+
+ *vma = find_vma(mm, addr);
+
+ if (!(*vma) || (*vma)->vm_ops != &isgx_vm_ops ||
+ addr < (*vma)->vm_start)
+ return -EINVAL;
+
+ /* Is ECREATE already done? */
+ enclave = (*vma)->vm_private_data;
+ if (!enclave)
+ return -ENOENT;
+
+ if (enclave->flags & ISGX_ENCLAVE_SUSPEND) {
+ isgx_info(enclave, "suspend ID has been changed");
+ return SGX_POWER_LOST_ENCLAVE;
+ }
+
+ return 0;
+}
+
+/**
+ * isgx_enclave_find_page() - find an enclave page
+ * @encl: the enclave to query
+ * @addr: the virtual address to query
+ */
+struct isgx_enclave_page *isgx_enclave_find_page(struct isgx_enclave *enclave,
+ unsigned long enclave_la)
+{
+ struct rb_node *node = enclave->enclave_rb.rb_node;
+
+ while (node) {
+ struct isgx_enclave_page *data =
+ container_of(node, struct isgx_enclave_page, node);
+
+ if (data->addr > enclave_la)
+ node = node->rb_left;
+ else if (data->addr < enclave_la)
+ node = node->rb_right;
+ else
+ return data;
+ }
+
+ return NULL;
+}
+
+void isgx_enclave_release(struct kref *ref)
+{
+ struct rb_node *rb1, *rb2;
+ struct isgx_enclave_page *entry;
+ struct isgx_va_page *va_page;
+ struct isgx_enclave *enclave =
+ container_of(ref, struct isgx_enclave, refcount);
+
+ mutex_lock(&isgx_tgid_ctx_mutex);
+ if (!list_empty(&enclave->enclave_list))
+ list_del(&enclave->enclave_list);
+
+ mutex_unlock(&isgx_tgid_ctx_mutex);
+
+ rb1 = rb_first(&enclave->enclave_rb);
+ while (rb1) {
+ entry = container_of(rb1, struct isgx_enclave_page, node);
+ rb2 = rb_next(rb1);
+ rb_erase(rb1, &enclave->enclave_rb);
+ if (entry->epc_page) {
+ list_del(&entry->load_list);
+ isgx_free_epc_page(entry->epc_page, enclave, 0);
+ }
+ kfree(entry);
+ rb1 = rb2;
+ }
+
+ while (!list_empty(&enclave->va_pages)) {
+ va_page = list_first_entry(&enclave->va_pages,
+ struct isgx_va_page, list);
+ list_del(&va_page->list);
+ isgx_free_epc_page(va_page->epc_page, NULL, 0);
+ kfree(va_page);
+ }
+
+ if (enclave->secs_page.epc_page)
+ isgx_free_epc_page(enclave->secs_page.epc_page, NULL, 0);
+
+ enclave->secs_page.epc_page = NULL;
+
+ if (enclave->tgid_ctx)
+ kref_put(&enclave->tgid_ctx->refcount, release_tgid_ctx);
+
+ if (enclave->backing)
+ fput(enclave->backing);
+
+ kfree(enclave);
+}
diff --git a/drivers/staging/intel_sgx/isgx_vma.c b/drivers/staging/intel_sgx/isgx_vma.c
new file mode 100644
index 0000000..f6cfb02
--- /dev/null
+++ b/drivers/staging/intel_sgx/isgx_vma.c
@@ -0,0 +1,282 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * Authors:
+ *
+ * Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxx>
+ * Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ * Serge Ayoun <serge.ayoun@xxxxxxxxx>
+ * Shay Katz-zamir <shay.katz-zamir@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include "isgx.h"
+#include <asm/mman.h>
+#include <linux/delay.h>
+#include <linux/file.h>
+#include <linux/highmem.h>
+#include <linux/ratelimit.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/hashtable.h>
+#include <linux/shmem_fs.h>
+
+static void isgx_vma_open(struct vm_area_struct *vma)
+{
+ struct isgx_enclave *enclave;
+ struct isgx_vma *evma;
+
+ /* Was vm_private_data nullified as a result of the previous fork? */
+ enclave = vma->vm_private_data;
+ if (!enclave)
+ goto out_fork;
+
+ /* Was the process forked? mm_struct changes when the process is
+ * forked.
+ */
+ mutex_lock(&enclave->lock);
+ evma = list_first_entry(&enclave->vma_list,
+ struct isgx_vma, vma_list);
+ if (evma->vma->vm_mm != vma->vm_mm) {
+ mutex_unlock(&enclave->lock);
+ goto out_fork;
+ }
+ mutex_unlock(&enclave->lock);
+
+ mutex_lock(&enclave->lock);
+ if (!list_empty(&enclave->vma_list)) {
+ evma = kzalloc(sizeof(*evma), GFP_KERNEL);
+ if (!evma) {
+ isgx_invalidate(enclave);
+ } else {
+ evma->vma = vma;
+ list_add_tail(&evma->vma_list, &enclave->vma_list);
+ }
+ }
+ mutex_unlock(&enclave->lock);
+
+ kref_get(&enclave->refcount);
+ return;
+out_fork:
+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+ vma->vm_private_data = NULL;
+}
+
+static void isgx_vma_close(struct vm_area_struct *vma)
+{
+ struct isgx_enclave *enclave = vma->vm_private_data;
+ struct isgx_vma *evma;
+
+ /* If process was forked, VMA is still there but
+ * vm_private_data is set to NULL.
+ */
+ if (!enclave)
+ return;
+
+ mutex_lock(&enclave->lock);
+
+ /* On vma_close() we remove the vma from vma_list
+ * there is a possibility that evma is not found
+ * in case vma_open() has failed on memory allocation
+ * and vma list has then been emptied
+ */
+ evma = isgx_find_vma(enclave, vma->vm_start);
+ if (evma) {
+ list_del(&evma->vma_list);
+ kfree(evma);
+ }
+
+ vma->vm_private_data = NULL;
+
+ isgx_zap_tcs_ptes(enclave, vma);
+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+
+ mutex_unlock(&enclave->lock);
+
+ kref_put(&enclave->refcount, isgx_enclave_release);
+}
+
+static int do_eldu(struct isgx_enclave *enclave,
+ struct isgx_enclave_page *enclave_page,
+ struct isgx_epc_page *epc_page,
+ struct page *backing,
+ bool is_secs)
+{
+ struct sgx_page_info pginfo;
+ void *secs_ptr = NULL;
+ void *epc_ptr;
+ void *va_ptr;
+ int ret;
+
+ pginfo.srcpge = (unsigned long)kmap_atomic(backing);
+ if (!is_secs)
+ secs_ptr = isgx_get_epc_page(enclave->secs_page.epc_page);
+ pginfo.secs = (unsigned long)secs_ptr;
+
+ epc_ptr = isgx_get_epc_page(epc_page);
+ va_ptr = isgx_get_epc_page(enclave_page->va_page->epc_page);
+
+ pginfo.linaddr = is_secs ? 0 : enclave_page->addr;
+ pginfo.pcmd = (unsigned long)&enclave_page->pcmd;
+
+ ret = __eldu((unsigned long)&pginfo,
+ (unsigned long)epc_ptr,
+ (unsigned long)va_ptr +
+ enclave_page->va_offset);
+
+ isgx_put_epc_page(va_ptr);
+ isgx_put_epc_page(epc_ptr);
+
+ if (!is_secs)
+ isgx_put_epc_page(secs_ptr);
+
+ kunmap_atomic((void *)(unsigned long)pginfo.srcpge);
+ WARN_ON(ret);
+ if (ret)
+ return -EFAULT;
+
+ return 0;
+}
+
+static struct isgx_enclave_page *isgx_vma_do_fault(struct vm_area_struct *vma,
+ unsigned long addr,
+ int reserve)
+{
+ struct isgx_enclave *enclave = vma->vm_private_data;
+ struct isgx_enclave_page *entry;
+ struct isgx_epc_page *epc_page;
+ struct isgx_epc_page *secs_epc_page = NULL;
+ struct page *backing;
+ unsigned free_flags = ISGX_FREE_SKIP_EREMOVE;
+ int rc;
+
+ /* If process was forked, VMA is still there but vm_private_data is set
+ * to NULL.
+ */
+ if (!enclave)
+ return ERR_PTR(-EFAULT);
+
+ entry = isgx_enclave_find_page(enclave, addr);
+ if (!entry)
+ return ERR_PTR(-EFAULT);
+
+ /* We use atomic allocation in the #PF handler in order to avoid ABBA
+ * deadlock with mmap_sems.
+ */
+ epc_page = isgx_alloc_epc_page(enclave->tgid_ctx, ISGX_ALLOC_ATOMIC);
+ if (IS_ERR(epc_page))
+ return (struct isgx_enclave_page *)epc_page;
+
+ /* The SECS page is not currently accounted. */
+ secs_epc_page = isgx_alloc_epc_page(NULL, ISGX_ALLOC_ATOMIC);
+ if (IS_ERR(secs_epc_page)) {
+ isgx_free_epc_page(epc_page, enclave, ISGX_FREE_SKIP_EREMOVE);
+ return (struct isgx_enclave_page *)secs_epc_page;
+ }
+
+ mutex_lock(&enclave->lock);
+
+ if (list_empty(&enclave->vma_list)) {
+ entry = ERR_PTR(-EFAULT);
+ goto out;
+ }
+
+ if (!(enclave->flags & ISGX_ENCLAVE_INITIALIZED)) {
+ isgx_dbg(enclave, "cannot fault, unitialized\n");
+ entry = ERR_PTR(-EFAULT);
+ goto out;
+ }
+
+ if (reserve && (entry->flags & ISGX_ENCLAVE_PAGE_RESERVED)) {
+ isgx_dbg(enclave, "cannot fault, 0x%lx is reserved\n",
+ entry->addr);
+ entry = ERR_PTR(-EBUSY);
+ goto out;
+ }
+
+ /* Legal race condition, page is already faulted. */
+ if (entry->epc_page) {
+ if (reserve)
+ entry->flags |= ISGX_ENCLAVE_PAGE_RESERVED;
+ goto out;
+ }
+
+ /* If SECS is evicted then reload it first */
+ if (enclave->flags & ISGX_ENCLAVE_SECS_EVICTED) {
+ backing = isgx_get_backing(enclave, &enclave->secs_page);
+ if (IS_ERR(backing)) {
+ entry = (void *)backing;
+ goto out;
+ }
+
+ rc = do_eldu(enclave, &enclave->secs_page, secs_epc_page,
+ backing, true /* is_secs */);
+ isgx_put_backing(backing, 0);
+ if (rc)
+ goto out;
+
+ enclave->secs_page.epc_page = secs_epc_page;
+ enclave->flags &= ~ISGX_ENCLAVE_SECS_EVICTED;
+
+ /* Do not free */
+ secs_epc_page = NULL;
+ }
+
+ backing = isgx_get_backing(enclave, entry);
+ if (IS_ERR(backing)) {
+ entry = (void *)backing;
+ goto out;
+ }
+
+ do_eldu(enclave, entry, epc_page, backing, false /* is_secs */);
+ rc = vm_insert_pfn(vma, entry->addr, PFN_DOWN(epc_page->pa));
+ isgx_put_backing(backing, 0);
+
+ if (rc) {
+ free_flags = 0;
+ goto out;
+ }
+
+ enclave->secs_child_cnt++;
+
+ entry->epc_page = epc_page;
+
+ if (reserve)
+ entry->flags |= ISGX_ENCLAVE_PAGE_RESERVED;
+
+ /* Do not free */
+ epc_page = NULL;
+
+ list_add_tail(&entry->load_list, &enclave->load_list);
+out:
+ mutex_unlock(&enclave->lock);
+ if (epc_page)
+ isgx_free_epc_page(epc_page, enclave, free_flags);
+ if (secs_epc_page)
+ isgx_free_epc_page(secs_epc_page, NULL,
+ ISGX_FREE_SKIP_EREMOVE);
+ return entry;
+}
+
+static int isgx_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ unsigned long addr = (unsigned long)vmf->virtual_address;
+ struct isgx_enclave_page *entry;
+
+ entry = isgx_vma_do_fault(vma, addr, 0);
+
+ if (!IS_ERR(entry) || PTR_ERR(entry) == -EBUSY)
+ return VM_FAULT_NOPAGE;
+ else
+ return VM_FAULT_SIGBUS;
+}
+
+struct vm_operations_struct isgx_vm_ops = {
+ .close = isgx_vma_close,
+ .open = isgx_vma_open,
+ .fault = isgx_vma_fault,
+};
--
2.7.4