[RFC PATCH 09/12] scs: kernel shadow stack with hardware assistance

From: Deepak Gupta
Date: Tue Apr 09 2024 - 02:13:56 EST


If shadow stack have memory protections from underlying cpu, use those
protections. RISCV uses PAGE_KERNEL_SHADOWSTACK to vmalloc such shadow
stack pages. Shadow stack pages on RISCV grows downwards like regular
stack. Clang based software shadow call stack grows low to high address.
Thus this patch addresses some of those needs due to opposite direction
of shadow stack. Furthermore, RISCV hw shadow stack can't be memset
because memset uses normal stores. Lastly to store magic word at base of
shadow stack, arch specific shadow stack store has to be performed.

Signed-off-by: Deepak Gupta <debug@xxxxxxxxxxxx>
---
include/linux/scs.h | 48 +++++++++++++++++++++++++++++++++------------
kernel/scs.c | 28 ++++++++++++++++++++++----
2 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/include/linux/scs.h b/include/linux/scs.h
index 4ab5bdc898cf..3a31433532d1 100644
--- a/include/linux/scs.h
+++ b/include/linux/scs.h
@@ -12,6 +12,7 @@
#include <linux/poison.h>
#include <linux/sched.h>
#include <linux/sizes.h>
+#include <asm/scs.h>

#ifdef CONFIG_SHADOW_CALL_STACK

@@ -31,6 +32,29 @@ void scs_init(void);
int scs_prepare(struct task_struct *tsk, int node);
void scs_release(struct task_struct *tsk);

+#ifdef CONFIG_DYNAMIC_SCS
+/* dynamic_scs_enabled set to true if RISCV dynamic SCS */
+#ifdef CONFIG_RISCV
+DECLARE_STATIC_KEY_TRUE(dynamic_scs_enabled);
+#else
+DECLARE_STATIC_KEY_FALSE(dynamic_scs_enabled);
+#endif
+#endif
+
+static inline bool scs_is_dynamic(void)
+{
+ if (!IS_ENABLED(CONFIG_DYNAMIC_SCS))
+ return false;
+ return static_branch_likely(&dynamic_scs_enabled);
+}
+
+static inline bool scs_is_enabled(void)
+{
+ if (!IS_ENABLED(CONFIG_DYNAMIC_SCS))
+ return true;
+ return scs_is_dynamic();
+}
+
static inline void scs_task_reset(struct task_struct *tsk)
{
/*
@@ -42,6 +66,9 @@ static inline void scs_task_reset(struct task_struct *tsk)

static inline unsigned long *__scs_magic(void *s)
{
+ if (scs_is_dynamic())
+ return (unsigned long *)(s);
+
return (unsigned long *)(s + SCS_SIZE) - 1;
}

@@ -50,23 +77,18 @@ static inline bool task_scs_end_corrupted(struct task_struct *tsk)
unsigned long *magic = __scs_magic(task_scs(tsk));
unsigned long sz = task_scs_sp(tsk) - task_scs(tsk);

- return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
-}
-
-DECLARE_STATIC_KEY_FALSE(dynamic_scs_enabled);
+ if (scs_is_dynamic())
+ sz = (task_scs(tsk) + SCS_SIZE) - task_scs_sp(tsk);

-static inline bool scs_is_dynamic(void)
-{
- if (!IS_ENABLED(CONFIG_DYNAMIC_SCS))
- return false;
- return static_branch_likely(&dynamic_scs_enabled);
+ return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
}

-static inline bool scs_is_enabled(void)
+static inline void __scs_store_magic(unsigned long *s, unsigned long magic_val)
{
- if (!IS_ENABLED(CONFIG_DYNAMIC_SCS))
- return true;
- return scs_is_dynamic();
+ if (scs_is_dynamic())
+ arch_scs_store(s, magic_val);
+ else
+ *__scs_magic(s) = SCS_END_MAGIC;
}

#else /* CONFIG_SHADOW_CALL_STACK */
diff --git a/kernel/scs.c b/kernel/scs.c
index d7809affe740..e447483fa9f4 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -13,8 +13,13 @@
#include <linux/vmstat.h>

#ifdef CONFIG_DYNAMIC_SCS
+/* dynamic_scs_enabled set to true if RISCV dynamic SCS */
+#ifdef CONFIG_RISCV
+DEFINE_STATIC_KEY_TRUE(dynamic_scs_enabled);
+#else
DEFINE_STATIC_KEY_FALSE(dynamic_scs_enabled);
#endif
+#endif

static void __scs_account(void *s, int account)
{
@@ -32,19 +37,29 @@ static void *__scs_alloc(int node)
{
int i;
void *s;
+ pgprot_t prot = PAGE_KERNEL;
+
+ if (scs_is_dynamic())
+ prot = PAGE_KERNEL_SHADOWSTACK;

for (i = 0; i < NR_CACHED_SCS; i++) {
s = this_cpu_xchg(scs_cache[i], NULL);
if (s) {
s = kasan_unpoison_vmalloc(s, SCS_SIZE,
KASAN_VMALLOC_PROT_NORMAL);
- memset(s, 0, SCS_SIZE);
+/*
+ * If either of them undefined, its safe to memset. Else memset is not
+ * possible. memset constitutes stores and stores to shadow stack memory
+ * are disallowed and will fault.
+ */
+ if (!scs_is_dynamic())
+ memset(s, 0, SCS_SIZE);
goto out;
}
}

s = __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_SCS, PAGE_KERNEL, 0, node,
+ GFP_SCS, prot, 0, node,
__builtin_return_address(0));

out:
@@ -59,7 +74,7 @@ void *scs_alloc(int node)
if (!s)
return NULL;

- *__scs_magic(s) = SCS_END_MAGIC;
+ __scs_store_magic(__scs_magic(s), SCS_END_MAGIC);

/*
* Poison the allocation to catch unintentional accesses to
@@ -122,7 +137,12 @@ int scs_prepare(struct task_struct *tsk, int node)
if (!s)
return -ENOMEM;

- task_scs(tsk) = task_scs_sp(tsk) = s;
+ task_scs(tsk) = s;
+ if (scs_is_dynamic())
+ task_scs_sp(tsk) = s + SCS_SIZE;
+ else
+ task_scs_sp(tsk) = s;
+
return 0;
}

--
2.43.2