[PATCH] x86/shstk: Free the thread shadow stack before disassociating from the mm

From: Mark Brown
Date: Tue Sep 10 2024 - 19:01:11 EST


When using shadow stacks the kernel will transparently allocate a shadow
stack for each thread. The intention is that this will be freed when the
thread exits but currently this doesn't actually happen. The deallocation
is done by shstk_free() which is called from exit_thread() and has a guard
to check for !tsk->mm due to the use of vm_unmap(). This doesn't actually
do anything since in do_exit() we call exit_mm() prior to thread_exit() and
exit_mm() disassociates the task from the mm and clears tsk->mm. The result
is that no shadow stacks will be freed until the process exits, leaking
memory for any process which creates and destroys threads.

Fix this by moving the shstk_free() to a new exit_thread_early() call which
runs immediately prior to exit_mm(). We don't do this right at the start of
do_exit() due to the handling for klling init. This could trigger some
incompatibility if there is code that looks at the shadow stack of a thread
which has exited but this seems much less likely than the leaking of shadow
stacks due to thread deletion.

Fixes: 18e66b695e78 ("x86/shstk: Add Kconfig option for shadow stack")
Signed-off-by: Mark Brown <broonie@xxxxxxxxxx>
---
It is entirely possible I am missing something here, I don't have a
system that allows me to test shadow stack support directly and have
only checked this by inspection and tested with my arm64 GCS series.
If this makes sense it'll need to become a dependency for GCS.
---
arch/Kconfig | 5 +++++
arch/x86/Kconfig | 1 +
arch/x86/kernel/process.c | 9 ++++++++-
include/linux/sched/task.h | 7 +++++++
kernel/exit.c | 2 ++
5 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 975dd22a2dbd..2fa3aedc1d23 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1037,6 +1037,11 @@ config HAVE_EXIT_THREAD
help
An architecture implements exit_thread.

+config HAVE_EXIT_THREAD_EARLY
+ bool
+ help
+ An architecture implements exit_thread_early.
+
config ARCH_MMAP_RND_BITS_MIN
int

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 007bab9f2a0e..916616026111 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -225,6 +225,7 @@ config X86
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EISA
select HAVE_EXIT_THREAD
+ select HAVE_EXIT_THREAD_EARLY
select HAVE_GUP_FAST
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index f63f8fd00a91..af3cabe08185 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -110,6 +110,14 @@ void arch_release_task_struct(struct task_struct *tsk)
}
#endif

+/*
+ * Free thread data structures etc..
+ */
+void exit_thread_early(struct task_struct *tsk)
+{
+ shstk_free(tsk);
+}
+
/*
* Free thread data structures etc..
*/
@@ -123,7 +131,6 @@ void exit_thread(struct task_struct *tsk)

free_vm86(t);

- shstk_free(tsk);
fpu__drop(fpu);
}

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index d362aacf9f89..cbafc6ba4e3e 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -88,6 +88,13 @@ static inline void exit_thread(struct task_struct *tsk)
{
}
#endif
+#ifdef CONFIG_HAVE_EXIT_THREAD_EARLY
+extern void exit_thread_early(struct task_struct *tsk);
+#else
+static inline void exit_thread_early(struct task_struct *tsk)
+{
+}
+#endif
extern __noreturn void do_group_exit(int);

extern void exit_files(struct task_struct *);
diff --git a/kernel/exit.c b/kernel/exit.c
index 7430852a8571..bc2c2a3da9fb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -866,6 +866,8 @@ void __noreturn do_exit(long code)
tsk->exit_code = code;
taskstats_exit(tsk, group_dead);

+ exit_thread_early(tsk);
+
exit_mm();

if (group_dead)

---
base-commit: 7c626ce4bae1ac14f60076d00eafe71af30450ba
change-id: 20240910-x86-fix-shstk-leak-add636c1d0e4

Best regards,
--
Mark Brown <broonie@xxxxxxxxxx>