Re: [PATCH v6 1/5] x86/pkeys: Add PKRU as a parameter in signal handling functions

From: Jeff Xu
Date: Wed Jul 17 2024 - 00:26:17 EST


On Thu, Jun 27, 2024 at 2:17 PM Aruna Ramakrishna
<aruna.ramakrishna@xxxxxxxxxx> wrote:
>
> Problem description:
> Let's assume there's a multithreaded application that runs untrusted
> user code. Each thread has its stack/code protected by a non-zero pkey,
> and the PKRU register is set up such that only that particular non-zero
> pkey is enabled. Each thread also sets up an alternate signal stack to
> handle signals, which is protected by pkey zero. The pkeys man page
> documents that the PKRU will be reset to init_pkru when the signal
> handler is invoked, which means that pkey zero access will be enabled.
> But this reset happens after the kernel attempts to push fpu state
> to the alternate stack, which is not (yet) accessible by the kernel,
> which leads to a new SIGSEGV being sent to the application, terminating
> it.
>
> Enabling both the non-zero pkey (for the thread) and pkey zero in
> userspace will not work for this use case. We cannot have the alt stack
> writeable by all - the rationale here is that the code running in that
> thread (using a non-zero pkey) is untrusted and should not have access
> to the alternate signal stack (that uses pkey zero), to prevent the
> return address of a function from being changed. The expectation is that
> kernel should be able to set up the alternate signal stack and deliver
> the signal to the application even if pkey zero is explicitly disabled
> by the application. The signal handler accessibility should not be
> dictated by whatever PKRU value the thread sets up.
>
> Solution:
> The PKRU register is managed by XSAVE, which means the sigframe contents
> must match the register contents - which is not the case here. We want
> the sigframe to contain the user-defined PKRU value (so that it is
> restored correctly from sigcontext) but the actual register must be
> reset to init_pkru so that the alt stack is accessible and the signal
> can be delivered to the application. It seems that the proper fix here
> would be to remove PKRU from the XSAVE framework and manage it
> separately, which is quite complicated. As a workaround, do this:
>
> orig_pkru = rdpkru();
> wrpkru(orig_pkru & init_pkru_value);
> xsave_to_user_sigframe();
> put_user(pkru_sigframe_addr, orig_pkru)
>
> This change is split over multiple patches.
>
> In preparation for writing PKRU to sigframe in a later patch, pass in
> PKRU as an additional parameter down the chain from handle_signal:
> setup_rt_frame()
> xxx_setup_rt_frame()
Above two functions don't access altstack, therefore we can keep it
the same as before.

> get_sigframe()
> copy_fpstate_to_sigframe()
> copy_fpregs_to_sigframe()
>
> There are no functional changes in this patch.
>
> Signed-off-by: Aruna Ramakrishna <aruna.ramakrishna@xxxxxxxxxx>
> ---
> arch/x86/include/asm/fpu/signal.h | 2 +-
> arch/x86/include/asm/sighandling.h | 10 +++++-----
> arch/x86/kernel/fpu/signal.c | 6 +++---
> arch/x86/kernel/signal.c | 19 ++++++++++---------
> arch/x86/kernel/signal_32.c | 8 ++++----
> arch/x86/kernel/signal_64.c | 8 ++++----
> 6 files changed, 27 insertions(+), 26 deletions(-)
>
> diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
> index 611fa41711af..eccc75bc9c4f 100644
> --- a/arch/x86/include/asm/fpu/signal.h
> +++ b/arch/x86/include/asm/fpu/signal.h
> @@ -29,7 +29,7 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
>
> unsigned long fpu__get_fpstate_size(void);
>
> -extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
> +extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size, u32 pkru);
> extern void fpu__clear_user_states(struct fpu *fpu);
> extern bool fpu__restore_sig(void __user *buf, int ia32_frame);
>
> diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
> index e770c4fc47f4..de458354a3ea 100644
> --- a/arch/x86/include/asm/sighandling.h
> +++ b/arch/x86/include/asm/sighandling.h
> @@ -17,11 +17,11 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
>
> void __user *
> get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
> - void __user **fpstate);
> + void __user **fpstate, u32 pkru);
>
> -int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs);
> -int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
> -int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
> -int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs);
> +int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru);
> +int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru);
> +int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru);
> +int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru);
>
> #endif /* _ASM_X86_SIGHANDLING_H */
> diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
> index 247f2225aa9f..2b3b9e140dd4 100644
> --- a/arch/x86/kernel/fpu/signal.c
> +++ b/arch/x86/kernel/fpu/signal.c
> @@ -156,7 +156,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
> return !err;
> }
>
> -static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
> +static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru)
> {
> if (use_xsave())
> return xsave_to_user_sigframe(buf);
> @@ -185,7 +185,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
> * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
> * indicating the absence/presence of the extended state to the user.
> */
> -bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
> +bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size, u32 pkru)
> {
> struct task_struct *tsk = current;
> struct fpstate *fpstate = tsk->thread.fpu.fpstate;
> @@ -228,7 +228,7 @@ bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
> fpregs_restore_userregs();
>
> pagefault_disable();
> - ret = copy_fpregs_to_sigframe(buf_fx);
> + ret = copy_fpregs_to_sigframe(buf_fx, pkru);
> pagefault_enable();
> fpregs_unlock();
>
> diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
> index 31b6f5dddfc2..94b894437327 100644
> --- a/arch/x86/kernel/signal.c
> +++ b/arch/x86/kernel/signal.c
> @@ -74,7 +74,7 @@ static inline int is_x32_frame(struct ksignal *ksig)
> */
> void __user *
> get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
> - void __user **fpstate)
> + void __user **fpstate, u32 pkru)
we can keep the signature the same, i.e. not adding pkru.

> {
> struct k_sigaction *ka = &ksig->ka;
> int ia32_frame = is_ia32_frame(ksig);
> @@ -139,7 +139,7 @@ get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
> }
>
> /* save i387 and extended state */
> - if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size))
> + if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size, pkru))
> return (void __user *)-1L;
>
> return (void __user *)sp;
> @@ -206,7 +206,7 @@ unsigned long get_sigframe_size(void)
> }
>
> static int
> -setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
> +setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru)
> {
> /* Perform fixup for the pre-signal frame. */
> rseq_signal_deliver(ksig, regs);
> @@ -214,21 +214,22 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
> /* Set up the stack frame */
> if (is_ia32_frame(ksig)) {
> if (ksig->ka.sa.sa_flags & SA_SIGINFO)
> - return ia32_setup_rt_frame(ksig, regs);
> + return ia32_setup_rt_frame(ksig, regs, pkru);
> else
> - return ia32_setup_frame(ksig, regs);
> + return ia32_setup_frame(ksig, regs, pkru);
> } else if (is_x32_frame(ksig)) {
> - return x32_setup_rt_frame(ksig, regs);
> + return x32_setup_rt_frame(ksig, regs, pkru);
> } else {
> - return x64_setup_rt_frame(ksig, regs);
> + return x64_setup_rt_frame(ksig, regs, pkru);
> }
> }
>
> static void
> handle_signal(struct ksignal *ksig, struct pt_regs *regs)
> {
> - bool stepping, failed;
> struct fpu *fpu = &current->thread.fpu;
> + u32 pkru = read_pkru();
This can be moved to get_sigframe(), the same for setting pkru=0

> + bool stepping, failed;
>
> if (v8086_mode(regs))
> save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL);
> @@ -264,7 +265,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
> if (stepping)
> user_disable_single_step(current);
>
> - failed = (setup_rt_frame(ksig, regs) < 0);
> + failed = (setup_rt_frame(ksig, regs, pkru) < 0);
The failure case can be handled in get_sigframe().

> if (!failed) {
> /*
> * Clear the direction flag as per the ABI for function entry.
> diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
> index ef654530bf5a..b437d02ecfd7 100644
> --- a/arch/x86/kernel/signal_32.c
> +++ b/arch/x86/kernel/signal_32.c
no change to signal_64.c if you keep pkru inside get_sigframe.

> @@ -228,7 +228,7 @@ do { \
> goto label; \
> } while(0)
>
> -int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs)
> +int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru)
ia32 doesn't support pkru iiuc, so no need to change the signature here.
Same comments for x32 related code path.

> {
> sigset32_t *set = (sigset32_t *) sigmask_to_save();
> struct sigframe_ia32 __user *frame;
> @@ -246,7 +246,7 @@ int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs)
> 0x80cd, /* int $0x80 */
> };
>
> - frame = get_sigframe(ksig, regs, sizeof(*frame), &fp);
> + frame = get_sigframe(ksig, regs, sizeof(*frame), &fp, pkru);
>
> if (ksig->ka.sa.sa_flags & SA_RESTORER) {
> restorer = ksig->ka.sa.sa_restorer;
> @@ -299,7 +299,7 @@ int ia32_setup_frame(struct ksignal *ksig, struct pt_regs *regs)
> return -EFAULT;
> }
>
> -int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
> +int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru)
> {
> sigset32_t *set = (sigset32_t *) sigmask_to_save();
> struct rt_sigframe_ia32 __user *frame;
> @@ -319,7 +319,7 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
> 0,
> };
>
> - frame = get_sigframe(ksig, regs, sizeof(*frame), &fp);
> + frame = get_sigframe(ksig, regs, sizeof(*frame), &fp, pkru);
>
> if (!user_access_begin(frame, sizeof(*frame)))
> return -EFAULT;
> diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
> index 8a94053c5444..ccfb7824ab2c 100644
> --- a/arch/x86/kernel/signal_64.c
> +++ b/arch/x86/kernel/signal_64.c
no change to signal_64.c if you keep pkru inside get_sigframe.

> @@ -161,7 +161,7 @@ static unsigned long frame_uc_flags(struct pt_regs *regs)
> return flags;
> }
>
> -int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
> +int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru)
no change to this function, because it doesn't access altstack.

> {
> sigset_t *set = sigmask_to_save();
> struct rt_sigframe __user *frame;
> @@ -172,7 +172,7 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
> if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
> return -EFAULT;
>
> - frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
> + frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp, pkru);
> uc_flags = frame_uc_flags(regs);
>
> if (!user_access_begin(frame, sizeof(*frame)))
> @@ -300,7 +300,7 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to,
> return __copy_siginfo_to_user32(to, from);
> }
>
> -int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
> +int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs, u32 pkru)
> {
> compat_sigset_t *set = (compat_sigset_t *) sigmask_to_save();
> struct rt_sigframe_x32 __user *frame;
> @@ -311,7 +311,7 @@ int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
> if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
> return -EFAULT;
>
> - frame = get_sigframe(ksig, regs, sizeof(*frame), &fp);
> + frame = get_sigframe(ksig, regs, sizeof(*frame), &fp, pkru);
>
> uc_flags = frame_uc_flags(regs);
>
> --
> 2.39.3
>