On Mon, Nov 11, 2024 at 02:22:14PM -0500, Mathieu Desnoyers wrote:
So I'm entirely agreeing with the intent, but perhaps we can argue a
little on the implementation :-)
+#ifdef CONFIG_DEBUG_RSEQ
+static struct rseq *rseq_kernel_fields(struct task_struct *t)
+{
+ return (struct rseq *) t->rseq_fields;
+}
+
+static int rseq_validate_ro_fields(struct task_struct *t)
+{
+ u32 cpu_id_start, cpu_id, node_id, mm_cid;
+ struct rseq __user *rseq = t->rseq;
+
+ /*
+ * Validate fields which are required to be read-only by
+ * user-space.
+ */
+ if (!user_read_access_begin(rseq, t->rseq_len))
+ goto efault;
+ unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end);
+ unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end);
+ unsafe_get_user(node_id, &rseq->node_id, efault_end);
+ unsafe_get_user(mm_cid, &rseq->mm_cid, efault_end);
+ user_read_access_end();
+
+ if (cpu_id_start != rseq_kernel_fields(t)->cpu_id_start)
+ printk_ratelimited(KERN_WARNING
+ "Detected rseq cpu_id_start field corruption. Value: %u, expecting: %u (pid=%d).\n",
+ cpu_id_start, rseq_kernel_fields(t)->cpu_id_start, t->pid);
+ if (cpu_id != rseq_kernel_fields(t)->cpu_id)
+ printk_ratelimited(KERN_WARNING
+ "Detected rseq cpu_id field corruption. Value: %u, expecting: %u (pid=%d).\n",
+ cpu_id, rseq_kernel_fields(t)->cpu_id, t->pid);
+ if (node_id != rseq_kernel_fields(t)->node_id)
+ printk_ratelimited(KERN_WARNING
+ "Detected rseq node_id field corruption. Value: %u, expecting: %u (pid=%d).\n",
+ node_id, rseq_kernel_fields(t)->node_id, t->pid);
+ if (mm_cid != rseq_kernel_fields(t)->mm_cid)
+ printk_ratelimited(KERN_WARNING
+ "Detected rseq mm_cid field corruption. Value: %u, expecting: %u (pid=%d).\n",
+ mm_cid, rseq_kernel_fields(t)->mm_cid, t->pid);
So aside from this just being ugly, this also has the problem of getting
the ratelimits out of sync and perhaps only showing partial corruption
for any one task.
Completely untested hackery below.
@@ -423,6 +504,17 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
current->rseq = rseq;
current->rseq_len = rseq_len;
current->rseq_sig = sig;
+#ifdef CONFIG_DEBUG_RSEQ
+ /*
+ * Initialize the in-kernel rseq fields copy for validation of
+ * read-only fields.
+ */
+ if (get_user(rseq_kernel_fields(current)->cpu_id_start, &rseq->cpu_id_start) ||
+ get_user(rseq_kernel_fields(current)->cpu_id, &rseq->cpu_id) ||
+ get_user(rseq_kernel_fields(current)->node_id, &rseq->node_id) ||
+ get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid))
+ return -EFAULT;
+#endif
So I didn't change the above, but wouldn't it make more sense to do
rseq_reset_rseq_cpu_node_id() here, but without the validation?
---
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1364,6 +1364,15 @@ struct task_struct {
* with respect to preemption.
*/
unsigned long rseq_event_mask;
+# ifdef CONFIG_DEBUG_RSEQ
+ /*
+ * This is a place holder to save a copy of the rseq fields for
+ * validation of read-only fields. The struct rseq has a
+ * variable-length array at the end, so it cannot be used
+ * directly. Reserve a size large enough for the known fields.
+ */
+ char rseq_fields[sizeof(struct rseq)];
+# endif
#endif
#ifdef CONFIG_SCHED_MM_CID
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -25,6 +25,79 @@
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
+#ifdef CONFIG_DEBUG_RSEQ
+static struct rseq *rseq_kernel_fields(struct task_struct *t)
+{
+ return (struct rseq *) t->rseq_fields;
+}
+
+static int rseq_validate_ro_fields(struct task_struct *t)
+{
+ static DEFINE_RATELIMIT_STATE(_rs,
+ DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ u32 cpu_id_start, cpu_id, node_id, mm_cid;
+ struct rseq __user *rseq = t->rseq;
+
+ /*
+ * Validate fields which are required to be read-only by
+ * user-space.
+ */
+ if (!user_read_access_begin(rseq, t->rseq_len))
+ goto efault;
+ unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end);
+ unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end);
+ unsafe_get_user(node_id, &rseq->node_id, efault_end);
+ unsafe_get_user(mm_cid, &rseq->mm_cid, efault_end);
+ user_read_access_end();
+
+ if ((cpu_id_start != rseq_kernel_fields(t)->cpu_id_start ||
+ cpu_id != rseq_kernel_fields(t)->cpu_id ||
+ node_id != rseq_kernel_fields(t)->node_id ||
+ mm_cid != rseq_kernel_fields(t)->mm_cid) && __ratelimit(&_rs)) {
+
+ pr_warn("Detected rseq corruption for pid: %d;\n"
+ " cpu_id_start: %u ?= %u\n"
+ " cpu_id: %u ?= %u\n"
+ " node_id: %u ?= %u\n"
+ " mm_cid: %u ?= %u\n"
+ t->pid,
+ cpu_id_start, rseq_kernel_fields(t)->cpu_id_start,
+ cpu_id, rseq_kernel_fields(t)->cpu_id,
+ node_id, rseq_kernel_fields(t)->node_id,
+ mm_cid, rseq_kernel_fields(t)->mm_cid);
+ }
+
+ /* For now, only print a console warning on mismatch. */
+ return 0;
+
+efault_end:
+ user_read_access_end();
+efault:
+ return -EFAULT;
+}
+
+static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id,
+ u32 node_id, u32 mm_cid)
+{
+ rseq_kernel_fields(t)->cpu_id_start = cpu_id;
+ rseq_kernel_fields(t)->cpu_id = cpu_id;
+ rseq_kernel_fields(t)->node_id = node_id;
+ rseq_kernel_fields(t)->mm_cid = mm_cid;
+}