[PATCH] rseq: update kernel fields in lockstep with CONFIG_DEBUG_RSEQ

From: Michael Jeanson
Date: Fri Feb 21 2025 - 14:15:10 EST


With CONFIG_DEBUG_RSEQ an in-kernel copy of the read-only fields is
kept synchronized with the user-space fields. Ensure the updates
are done in lockstep in case we error out on a write to user-space.

Fixes: 7d5265ffcd8b ("rseq: Validate read-only fields under DEBUG_RSEQ config")
Signed-off-by: Michael Jeanson <mjeanson@xxxxxxxxxxxx>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
---
kernel/rseq.c | 85 +++++++++++++++++++++++++++------------------------
1 file changed, 45 insertions(+), 40 deletions(-)

diff --git a/kernel/rseq.c b/kernel/rseq.c
index 2cb16091ec0a..5bdb96944e1f 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -26,6 +26,11 @@
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)

+static struct rseq __user *rseq_user_fields(struct task_struct *t)
+{
+ return (struct rseq __user *) t->rseq;
+}
+
#ifdef CONFIG_DEBUG_RSEQ
static struct rseq *rseq_kernel_fields(struct task_struct *t)
{
@@ -78,24 +83,24 @@ static int rseq_validate_ro_fields(struct task_struct *t)
return -EFAULT;
}

-static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id,
- u32 node_id, u32 mm_cid)
-{
- rseq_kernel_fields(t)->cpu_id_start = cpu_id;
- rseq_kernel_fields(t)->cpu_id = cpu_id;
- rseq_kernel_fields(t)->node_id = node_id;
- rseq_kernel_fields(t)->mm_cid = mm_cid;
-}
+/*
+ * Update an rseq field and its in-kernel copy in lock-step to keep a coherent
+ * state.
+ */
+#define unsafe_rseq_set_field(t, field, value, error_label) \
+ do { \
+ unsafe_put_user(value, &rseq_user_fields(t)->field, error_label); \
+ rseq_kernel_fields(t)->field = value; \
+ } while (0)
+
#else
static int rseq_validate_ro_fields(struct task_struct *t)
{
return 0;
}

-static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id,
- u32 node_id, u32 mm_cid)
-{
-}
+#define unsafe_rseq_set_field(t, field, value, error_label) \
+ unsafe_put_user(value, &rseq_user_fields(t)->field, error_label)
#endif

/*
@@ -173,17 +178,18 @@ static int rseq_update_cpu_node_id(struct task_struct *t)
WARN_ON_ONCE((int) mm_cid < 0);
if (!user_write_access_begin(rseq, t->rseq_len))
goto efault;
- unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end);
- unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end);
- unsafe_put_user(node_id, &rseq->node_id, efault_end);
- unsafe_put_user(mm_cid, &rseq->mm_cid, efault_end);
+
+ unsafe_rseq_set_field(t, cpu_id_start, cpu_id, efault_end);
+ unsafe_rseq_set_field(t, cpu_id, cpu_id, efault_end);
+ unsafe_rseq_set_field(t, node_id, node_id, efault_end);
+ unsafe_rseq_set_field(t, mm_cid, mm_cid, efault_end);
+
/*
* Additional feature fields added after ORIG_RSEQ_SIZE
* need to be conditionally updated only if
* t->rseq_len != ORIG_RSEQ_SIZE.
*/
user_write_access_end();
- rseq_set_ro_fields(t, cpu_id, cpu_id, node_id, mm_cid);
trace_rseq_update(t);
return 0;

@@ -195,6 +201,7 @@ static int rseq_update_cpu_node_id(struct task_struct *t)

static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
{
+ struct rseq __user *rseq = t->rseq;
u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0,
mm_cid = 0;

@@ -202,38 +209,36 @@ static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
* Validate read-only rseq fields.
*/
if (rseq_validate_ro_fields(t))
- return -EFAULT;
- /*
- * Reset cpu_id_start to its initial state (0).
- */
- if (put_user(cpu_id_start, &t->rseq->cpu_id_start))
- return -EFAULT;
- /*
- * Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming
- * in after unregistration can figure out that rseq needs to be
- * registered again.
- */
- if (put_user(cpu_id, &t->rseq->cpu_id))
- return -EFAULT;
- /*
- * Reset node_id to its initial state (0).
- */
- if (put_user(node_id, &t->rseq->node_id))
- return -EFAULT;
+ goto efault;
+
+ if (!user_write_access_begin(rseq, t->rseq_len))
+ goto efault;
+
/*
- * Reset mm_cid to its initial state (0).
+ * Reset all fields to their initial state.
+ *
+ * All fields have an initial state of 0 except cpu_id which is set to
+ * RSEQ_CPU_ID_UNINITIALIZED, so that any user coming in after
+ * unregistration can figure out that rseq needs to be registered
+ * again.
*/
- if (put_user(mm_cid, &t->rseq->mm_cid))
- return -EFAULT;
-
- rseq_set_ro_fields(t, cpu_id_start, cpu_id, node_id, mm_cid);
+ unsafe_rseq_set_field(t, cpu_id_start, cpu_id_start, efault_end);
+ unsafe_rseq_set_field(t, cpu_id, cpu_id, efault_end);
+ unsafe_rseq_set_field(t, node_id, node_id, efault_end);
+ unsafe_rseq_set_field(t, mm_cid, mm_cid, efault_end);

/*
* Additional feature fields added after ORIG_RSEQ_SIZE
* need to be conditionally reset only if
* t->rseq_len != ORIG_RSEQ_SIZE.
*/
+ user_write_access_end();
return 0;
+
+efault_end:
+ user_write_access_end();
+efault:
+ return -EFAULT;
}

static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
--
2.43.0