[RFC PATCH v2 1/2] rseq: x86: implement abort-at-ip extension
From: Mathieu Desnoyers
Date: Mon Jan 10 2022 - 12:16:24 EST
Allow rseq critical section abort handlers to optionally figure out at
which instruction pointer the rseq critical section was aborted.
This allows implementing rseq critical sections with loops containing
the commit instruction, for which having the commit as last instruction
of the sequence is not possible. This is useful to implement adaptative
mutexes aware of preemption in user-space. (see [1])
This also allows implementing rseq critical sections with multiple
commit steps, and use the abort-at-ip information to figure out what
needs to be undone in the abort handler.
Introduce the RSEQ_FLAG_QUERY_ABORT_AT_IP rseq system call flag. This
lets userspace query whether the kernel and architecture supports the
abort-at-ip rseq extension.
Only provide this information for rseq critical sections for which the
rseq_cs descriptor has the RSEQ_CS_FLAG_ABORT_AT_IP flag set. Abort
handlers for critical sections with this flag set need to readjust the
stack pointer. The abort-at-ip pointer is populated by the kernel on
the top of stack on abort. For x86-32, the abort handler of an
abort-at-ip critical section needs to add 4 bytes to the stack pointer.
For x86-64, the abort hanler needs to add 136 bytes to the stack
pointer: 8 bytes to skip the abort-at-ip value, and 128 bytes to skip
the x86-64 redzone for leaf functions.
[1] https://github.com/compudj/rseq-test/blob/adapt-lock-abort-at-ip/test-rseq-adaptative-lock.c#L80
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
---
Changes since v1:
- Use top of stack to place abort-at-ip value rather than ecx/rcx
register,
- Skip redzone on x86-64.
---
arch/x86/include/asm/ptrace.h | 5 +++++
arch/x86/kernel/ptrace.c | 12 ++++++++++++
include/uapi/linux/rseq.h | 4 ++++
kernel/rseq.c | 28 ++++++++++++++++++++++++++++
4 files changed, 49 insertions(+)
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 703663175a5a..c96eb2448110 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -387,5 +387,10 @@ extern int do_set_thread_area(struct task_struct *p, int idx,
# define do_set_thread_area_64(p, s, t) (0)
#endif
+#ifdef CONFIG_RSEQ
+# define RSEQ_ARCH_HAS_ABORT_AT_IP
+int rseq_abort_at_ip(struct pt_regs *regs, unsigned long ip);
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PTRACE_H */
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 6d2244c94799..561ed98d12ba 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1368,3 +1368,15 @@ void user_single_step_report(struct pt_regs *regs)
{
send_sigtrap(regs, 0, TRAP_BRKPT);
}
+
+int rseq_abort_at_ip(struct pt_regs *regs, unsigned long ip)
+{
+ if (user_64bit_mode(regs)) {
+ /* Need to skip redzone for leaf functions. */
+ regs->sp -= sizeof(u64) + 128;
+ return put_user(ip, (u64 __user *)regs->sp);
+ } else {
+ regs->sp -= sizeof(u32);
+ return put_user(ip, (u32 __user *)regs->sp);
+ }
+}
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index 9a402fdb60e9..3130232e6d0c 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -20,12 +20,14 @@ enum rseq_cpu_id_state {
enum rseq_flags {
RSEQ_FLAG_UNREGISTER = (1 << 0),
+ RSEQ_FLAG_QUERY_ABORT_AT_IP = (1 << 1),
};
enum rseq_cs_flags_bit {
RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
+ RSEQ_CS_FLAG_ABORT_AT_IP_BIT = 3,
};
enum rseq_cs_flags {
@@ -35,6 +37,8 @@ enum rseq_cs_flags {
(1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE =
(1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
+ RSEQ_CS_FLAG_ABORT_AT_IP =
+ (1U << RSEQ_CS_FLAG_ABORT_AT_IP_BIT),
};
/*
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 6d45ac3dae7f..fb52f2d11b56 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -21,6 +21,13 @@
#define RSEQ_CS_PREEMPT_MIGRATE_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE | \
RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT)
+#ifdef RSEQ_ARCH_HAS_ABORT_AT_IP
+static bool rseq_has_abort_at_ip(void) { return true; }
+#else
+static bool rseq_has_abort_at_ip(void) { return false; }
+static int rseq_abort_at_ip(struct pt_regs *regs, unsigned long ip) { return 0; }
+#endif
+
/*
*
* Restartable sequences are a lightweight interface that allows
@@ -79,6 +86,16 @@
*
* [abort_ip]
* F1. <failure>
+ *
+ * rseq critical sections defined with the RSEQ_CS_FLAG_ABORT_AT_IP flag
+ * have the following behavior on abort: when the stack grows down: the
+ * stack pointer is decremented to skip the redzone, and decremented of
+ * the pointer size. The aborted address (abort-at-ip) is stored at
+ * this stack pointer location. The user-space abort handler needs to
+ * pop the abort-at-ip address from the stack, and add the redzone size
+ * to the stack pointer.
+ *
+ * TODO: describe stack grows up.
*/
static int rseq_update_cpu_id(struct task_struct *t)
@@ -261,6 +278,11 @@ static int rseq_ip_fixup(struct pt_regs *regs)
trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset,
rseq_cs.abort_ip);
instruction_pointer_set(regs, (unsigned long)rseq_cs.abort_ip);
+ if (rseq_cs.flags & RSEQ_CS_FLAG_ABORT_AT_IP) {
+ ret = rseq_abort_at_ip(regs, ip);
+ if (ret)
+ return ret;
+ }
return 0;
}
@@ -330,6 +352,12 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
{
int ret;
+ if (flags & RSEQ_FLAG_QUERY_ABORT_AT_IP) {
+ if (flags & ~RSEQ_FLAG_QUERY_ABORT_AT_IP)
+ return -EINVAL;
+ return rseq_has_abort_at_ip() ? 0 : -EINVAL;
+ }
+
if (flags & RSEQ_FLAG_UNREGISTER) {
if (flags & ~RSEQ_FLAG_UNREGISTER)
return -EINVAL;
--
2.17.1