On Fri, Jul 25, 2025 at 04:13:27PM +0000, Sami Tolvanen wrote:
On Thu, Jul 24, 2025 at 04:37:03PM -0700, Deepak Gupta wrote:
If shadow stack have memory protections from underlying cpu, use those
protections. arches can define PAGE_KERNEL_SHADOWSTACK to vmalloc such shadow
stack pages. Hw assisted shadow stack pages grow downwards like regular
stack. Clang based software shadow call stack grows low to high address.
Is this the case for all the current hardware shadow stack
implementations? If not, we might want a separate config for the
shadow stack direction instead.
Is there something like this for regular stack as well?
I could copy same mechanism.
Thus this patch addresses some of those needs due to opposite direction
of shadow stack. Furthermore, hw shadow stack can't be memset because memset
uses normal stores. Lastly to store magic word at base of shadow stack, arch
specific shadow stack store has to be performed.
Signed-off-by: Deepak Gupta <debug@xxxxxxxxxxxx>
---
include/linux/scs.h | 26 +++++++++++++++++++++++++-
kernel/scs.c | 38 +++++++++++++++++++++++++++++++++++---
2 files changed, 60 insertions(+), 4 deletions(-)
diff --git a/include/linux/scs.h b/include/linux/scs.h
index 4ab5bdc898cf..6ceee07c2d1a 100644
--- a/include/linux/scs.h
+++ b/include/linux/scs.h
@@ -12,6 +12,7 @@
#include <linux/poison.h>
#include <linux/sched.h>
#include <linux/sizes.h>
+#include <asm/scs.h>
#ifdef CONFIG_SHADOW_CALL_STACK
@@ -37,22 +38,45 @@ static inline void scs_task_reset(struct task_struct *tsk)
* Reset the shadow stack to the base address in case the task
* is reused.
*/
+#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
+ task_scs_sp(tsk) = task_scs(tsk) + SCS_SIZE;
+#else
task_scs_sp(tsk) = task_scs(tsk);
+#endif
}
static inline unsigned long *__scs_magic(void *s)
{
+#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
+ return (unsigned long *)(s);
+#else
return (unsigned long *)(s + SCS_SIZE) - 1;
+#endif
}
static inline bool task_scs_end_corrupted(struct task_struct *tsk)
{
unsigned long *magic = __scs_magic(task_scs(tsk));
- unsigned long sz = task_scs_sp(tsk) - task_scs(tsk);
+ unsigned long sz;
+
+#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
+ sz = (task_scs(tsk) + SCS_SIZE) - task_scs_sp(tsk);
+#else
+ sz = task_scs_sp(tsk) - task_scs(tsk);
+#endif
return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
}
+static inline void __scs_store_magic(unsigned long *s, unsigned long magic_val)
+{
+#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
+ arch_scs_store(s, magic_val);
+#else
+ *__scs_magic(s) = magic_val;
+#endif
+}
+
I'm not a huge fan of all the ifdefs. We could clean this up by
allowing architectures to simply override some these functions, or at
least use if (IS_ENABLED(CONFIG...)) instead. Will, any thoughts about
this?
DECLARE_STATIC_KEY_FALSE(dynamic_scs_enabled);
static inline bool scs_is_dynamic(void)
diff --git a/kernel/scs.c b/kernel/scs.c
index d7809affe740..5910c0a8eabd 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -11,6 +11,7 @@
#include <linux/scs.h>
#include <linux/vmalloc.h>
#include <linux/vmstat.h>
+#include <asm-generic/set_memory.h>
#ifdef CONFIG_DYNAMIC_SCS
DEFINE_STATIC_KEY_FALSE(dynamic_scs_enabled);
@@ -32,19 +33,31 @@ static void *__scs_alloc(int node)
{
int i;
void *s;
+ pgprot_t prot = PAGE_KERNEL;
+
+#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
+ prot = PAGE_KERNEL_SHADOWSTACK;
+#endif
I would rather define the shadow stack protection flags in the header
file and allow them to be overridden in asm/scs.h.
for (i = 0; i < NR_CACHED_SCS; i++) {
s = this_cpu_xchg(scs_cache[i], NULL);
if (s) {
s = kasan_unpoison_vmalloc(s, SCS_SIZE,
KASAN_VMALLOC_PROT_NORMAL);
+/*
+ * If software shadow stack, its safe to memset. Else memset is not
+ * possible on hw protected shadow stack. memset constitutes stores and
+ * stores to shadow stack memory are disallowed and will fault.
+ */
+#ifndef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
memset(s, 0, SCS_SIZE);
+#endif
This could also be moved to a static inline function that
architectures can override if they have hardware shadow stacks that
cannot be cleared at this point.
goto out;
}
}
s = __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_SCS, PAGE_KERNEL, 0, node,
+ GFP_SCS, prot, 0, node,
__builtin_return_address(0));
out:
@@ -59,7 +72,7 @@ void *scs_alloc(int node)
if (!s)
return NULL;
- *__scs_magic(s) = SCS_END_MAGIC;
+ __scs_store_magic(__scs_magic(s), SCS_END_MAGIC);
/*
* Poison the allocation to catch unintentional accesses to
@@ -87,6 +100,16 @@ void scs_free(void *s)
return;
kasan_unpoison_vmalloc(s, SCS_SIZE, KASAN_VMALLOC_PROT_NORMAL);
+ /*
+ * Hardware protected shadow stack is not writeable by regular stores
+ * Thus adding this back to free list will raise faults by vmalloc
+ * It needs to be writeable again. It's good sanity as well because
+ * then it can't be inadvertently accesses and if done, it will fault.
+ */
+#ifdef CONFIG_ARCH_HAS_KERNEL_SHADOW_STACK
+ set_memory_rw((unsigned long)s, (SCS_SIZE/PAGE_SIZE));
+#endif
Another candidate for an arch-specific function to reduce the number
of ifdefs in the generic code.
Sami