[PATCH 3/4] einj_mem_uc_uaccess: exercise copy_from_user hwpoison recovery via syscalls
From: Ruidong Tian
Date: Tue Jun 16 2026 - 21:53:12 EST
From: Ruidong Tian <ruidong.trd@xxxxxxxxxxxxxxxxx>
The MC-safe copy_from_user (copyin) path has already been validated to
recover from a poisoned source page: the consumed hardware memory error
is contained and reported rather than crashing the kernel. That
baseline, however, only proves the mechanism works on a single code
path.
These tests add cases that reach copy_from_user / get_user through a
range of syscall entry points. The goal is not to re-validate copyin
itself, but to demonstrate that the same recovery holds regardless of
which syscall drives the copy -- i.e. the fix applies broadly across the
kernel's uaccess call sites rather than in one isolated path.
Each test poisons a user page and then issues one syscall whose kernel
side copies FROM that page, expecting either SIGBUS delivery or an
-EFAULT return instead of a fatal fault.
Test cases by subsystem:
File I/O: pwrite_uc, writev_uc
Network: send_uc, sendmsg_uc, setsockopt_uc, netlink_send_uc
IPC/sync: msgsnd_uc, mq_send_uc, semop_uc
Memory: process_vm_writev_uc
Signed-off-by: Ruidong Tian <tianruidong@xxxxxxxxxxxxxxxxx>
---
Makefile | 4 +-
einj_mem_uc.c | 16 +-
einj_mem_uc.h | 3 +
einj_mem_uc_uaccess.c | 369 ++++++++++++++++++++++++++++++++++++++++++
4 files changed, 389 insertions(+), 3 deletions(-)
create mode 100644 einj_mem_uc_uaccess.c
diff --git a/Makefile b/Makefile
index 2b5b78d..03d7673 100644
--- a/Makefile
+++ b/Makefile
@@ -34,8 +34,8 @@ rep_ce_page: rep_ce_page.o proc_pagemap.o einj.o
hornet: hornet.o einj.o
$(CC) -o hornet $(CFLAGS) hornet.o einj.o
-einj_mem_uc: einj_mem_uc.o einj_mem_uc_mm.o proc_cpuinfo.o proc_interrupt.o proc_pagemap.o do_memcpy.o einj.o
- $(CC) -o einj_mem_uc $(CFLAGS) einj_mem_uc.o einj_mem_uc_mm.o proc_cpuinfo.o proc_interrupt.o proc_pagemap.o do_memcpy.o einj.o -pthread
+einj_mem_uc: einj_mem_uc.o einj_mem_uc_mm.o einj_mem_uc_uaccess.o proc_cpuinfo.o proc_interrupt.o proc_pagemap.o do_memcpy.o einj.o
+ $(CC) -o einj_mem_uc $(CFLAGS) einj_mem_uc.o einj_mem_uc_mm.o einj_mem_uc_uaccess.o proc_cpuinfo.o proc_interrupt.o proc_pagemap.o do_memcpy.o einj.o -pthread -lrt
lmce: proc_pagemap.o lmce.o
$(CC) -o lmce $(CFLAGS) proc_pagemap.o lmce.o -pthread
diff --git a/einj_mem_uc.c b/einj_mem_uc.c
index a6471af..aea795c 100644
--- a/einj_mem_uc.c
+++ b/einj_mem_uc.c
@@ -1079,6 +1079,7 @@ static void show_help(void)
printf(" %-24s %-5s %s\n", "Testname", "Fatal", "Description");
show_test_array(tests, 0, NULL);
show_test_array(mm_tests, mm_tests_count, "MM subsystem (hwpoison recovery)");
+ show_test_array(uaccess_tests, uaccess_tests_count, "uaccess (copy_from_user direction)");
exit(0);
}
@@ -1108,6 +1109,9 @@ static struct test *lookup_test(char *s)
if (t)
return t;
t = lookup_in(mm_tests, mm_tests_count, s);
+ if (t)
+ return t;
+ t = lookup_in(uaccess_tests, uaccess_tests_count, s);
if (t)
return t;
fprintf(stderr, "%s: unknown test '%s'\n", progname, s);
@@ -1118,7 +1122,7 @@ static struct test *next_test(struct test *t)
{
/*
* Walk: tests[] (NULL-terminated) -> mm_tests[] (counted) ->
- * wrap to tests[].
+ * uaccess_tests[] (counted) -> wrap to tests[].
*/
if (t >= tests && (!t->testname || t[1].testname)) {
t++;
@@ -1126,12 +1130,22 @@ static struct test *next_test(struct test *t)
return t;
if (mm_tests_count)
return &mm_tests[0];
+ if (uaccess_tests_count)
+ return &uaccess_tests[0];
return tests;
}
if (mm_tests_count && t >= mm_tests && t < mm_tests + mm_tests_count) {
t++;
if (t < mm_tests + mm_tests_count)
return t;
+ if (uaccess_tests_count)
+ return &uaccess_tests[0];
+ return tests;
+ }
+ if (uaccess_tests_count && t >= uaccess_tests && t < uaccess_tests + uaccess_tests_count) {
+ t++;
+ if (t < uaccess_tests + uaccess_tests_count)
+ return t;
return tests;
}
return tests;
diff --git a/einj_mem_uc.h b/einj_mem_uc.h
index c2d6c32..1f0487a 100644
--- a/einj_mem_uc.h
+++ b/einj_mem_uc.h
@@ -66,4 +66,7 @@ void skip_test(const char *msg) __attribute__((noreturn));
extern struct test mm_tests[];
extern int mm_tests_count;
+extern struct test uaccess_tests[];
+extern int uaccess_tests_count;
+
#endif /* EINJ_MEM_UC_H */
diff --git a/einj_mem_uc_uaccess.c b/einj_mem_uc_uaccess.c
new file mode 100644
index 0000000..9840938
--- /dev/null
+++ b/einj_mem_uc_uaccess.c
@@ -0,0 +1,369 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * einj_mem_uc_uaccess.c - extension tests that exercise the kernel's
+ * copy_from_user / get_user paths from a variety of syscall entry points.
+ *
+ * All tests in this file belong to the "uaccess" category: the kernel
+ * copies FROM the poisoned user buffer into kernel space. The expected
+ * recovery is SIGBUS delivery or -EFAULT errno.
+ *
+ * Each test reuses the existing page allocators data_alloc / map_file_alloc
+ * from einj_mem_uc.c: the returned virtual address is what will be poisoned,
+ * and the trigger function issues one syscall whose kernel side copies
+ * from that address.
+ */
+
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <signal.h>
+#include <time.h>
+#include <sched.h>
+#include <poll.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+#include <sys/msg.h>
+#include <sys/sem.h>
+#include <sys/ipc.h>
+#include <sys/wait.h>
+
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include <mqueue.h>
+
+#include "einj.h"
+#include "einj_mem_uc.h"
+
+/* A small shared scratch buffer and pipes/sockets used across triggers. */
+#define SCRATCH_LEN 256
+static int g_sock_pair[2] = { -1, -1 };
+static int g_tmp_fd = -1;
+static int g_nl_fd = -1;
+static int g_mq_fd = -1;
+static int g_msq_id = -1;
+static int g_sem_id = -1;
+
+static void close_if(int *fd)
+{
+ if (*fd >= 0) {
+ close(*fd);
+ *fd = -1;
+ }
+}
+
+static void uaccess_cleanup(void)
+{
+ close_if(&g_sock_pair[0]);
+ close_if(&g_sock_pair[1]);
+ close_if(&g_tmp_fd);
+ close_if(&g_nl_fd);
+ if (g_mq_fd != -1) {
+ mq_close(g_mq_fd);
+ mq_unlink("/einj_mq");
+ g_mq_fd = -1;
+ }
+ if (g_msq_id != -1) {
+ msgctl(g_msq_id, IPC_RMID, NULL);
+ g_msq_id = -1;
+ }
+ if (g_sem_id != -1) {
+ semctl(g_sem_id, 0, IPC_RMID);
+ g_sem_id = -1;
+ }
+}
+
+static void ensure_cleanup_registered(void)
+{
+ static int done;
+
+ if (!done) {
+ register_cleanup(uaccess_cleanup);
+ done = 1;
+ }
+}
+
+static int open_tmpfile(void)
+{
+ char path[] = "/tmp/einj-ua-XXXXXX";
+ int fd = mkstemp(path);
+
+ if (fd >= 0)
+ unlink(path);
+ return fd;
+}
+
+static int ensure_socketpair(void)
+{
+ ensure_cleanup_registered();
+ if (g_sock_pair[0] >= 0)
+ return 0;
+ return socketpair(AF_UNIX, SOCK_STREAM, 0, g_sock_pair);
+}
+
+static int ensure_tmpfile(void)
+{
+ ensure_cleanup_registered();
+ if (g_tmp_fd >= 0)
+ return 0;
+ g_tmp_fd = open_tmpfile();
+ if (g_tmp_fd < 0)
+ return -1;
+ /* pre-populate so read-side tests have data */
+ char pad[4096];
+ ssize_t ret;
+ memset(pad, 0x5a, sizeof(pad));
+ ret = pwrite(g_tmp_fd, pad, sizeof(pad), 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: pwrite(tmpfile) failed (errno=%d)\n", progname, errno);
+ return -1;
+ }
+ return 0;
+}
+
+/* ========= File I/O ========= */
+
+static int trigger_pwrite_uc(char *addr)
+{
+ ssize_t ret;
+ PRINT_TRIGGERING;
+ if (ensure_tmpfile() < 0)
+ return -1;
+ ret = pwrite(g_tmp_fd, addr, SCRATCH_LEN, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: pwrite failed (errno=%d)\n", progname, errno);
+ return -1;
+ }
+ return 0;
+}
+
+static int trigger_writev_uc(char *addr)
+{
+ struct iovec iov[2];
+ char head[32] = { 0 };
+ ssize_t ret;
+
+ PRINT_TRIGGERING;
+ if (ensure_tmpfile() < 0)
+ return -1;
+ iov[1].iov_base = head;
+ iov[1].iov_len = sizeof(head);
+ iov[0].iov_base = addr;
+ iov[0].iov_len = SCRATCH_LEN;
+ ret = writev(g_tmp_fd, iov, 2);
+ if (ret < 0) {
+ fprintf(stderr, "%s: writev failed (errno=%d)\n", progname, errno);
+ return -1;
+ }
+ return 0;
+}
+
+/* ========= Network ========= */
+
+static int trigger_send_uc(char *addr)
+{
+ ssize_t ret;
+ PRINT_TRIGGERING;
+ if (ensure_socketpair() < 0)
+ return -1;
+ ret = send(g_sock_pair[0], addr, SCRATCH_LEN, MSG_DONTWAIT);
+ if (ret < 0) {
+ fprintf(stderr, "%s: send failed (errno=%d)\n", progname, errno);
+ return -1;
+ }
+ return 0;
+}
+
+static int trigger_sendmsg_uc(char *addr)
+{
+ struct iovec iov = { .iov_base = addr, .iov_len = SCRATCH_LEN };
+ struct msghdr mh = { 0 };
+ ssize_t ret;
+
+ PRINT_TRIGGERING;
+ if (ensure_socketpair() < 0)
+ return -1;
+ mh.msg_iov = &iov;
+ mh.msg_iovlen = 1;
+ ret = sendmsg(g_sock_pair[0], &mh, MSG_DONTWAIT);
+ if (ret < 0) {
+ fprintf(stderr, "%s: sendmsg failed (errno=%d)\n", progname, errno);
+ return -1;
+ }
+ return 0;
+}
+
+static int trigger_setsockopt_uc(char *addr)
+{
+ int ret;
+ PRINT_TRIGGERING;
+ if (ensure_socketpair() < 0)
+ return -1;
+ /* kernel copies len bytes from addr into the option buffer */
+ ret = setsockopt(g_sock_pair[0], SOL_SOCKET, SO_MARK, addr, sizeof(int));
+ if (ret < 0) {
+ fprintf(stderr, "%s: setsockopt failed (errno=%d)\n", progname, errno);
+ return -1;
+ }
+ return 0;
+}
+
+static int trigger_netlink_send_uc(char *addr)
+{
+ struct sockaddr_nl sa = { .nl_family = AF_NETLINK };
+ ssize_t ret;
+
+ PRINT_TRIGGERING;
+ ensure_cleanup_registered();
+ if (g_nl_fd < 0) {
+ g_nl_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (g_nl_fd < 0)
+ return -1;
+ if (bind(g_nl_fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ fprintf(stderr, "%s: bind(nl) failed (errno=%d)\n", progname, errno);
+ close(g_nl_fd);
+ g_nl_fd = -1;
+ return -1;
+ }
+ }
+ /* Kernel will copy the header+payload from poisoned page to parse it */
+ ret = sendto(g_nl_fd, addr, sizeof(struct nlmsghdr), 0,
+ (struct sockaddr *)&sa, sizeof(sa));
+ if (ret < 0) {
+ fprintf(stderr, "%s: sendto(nl) failed (errno=%d)\n", progname, errno);
+ return -1;
+ }
+ return 0;
+}
+
+/* ========= IPC / sync ========= */
+
+static int trigger_msgsnd_uc(char *addr)
+{
+ int ret;
+ PRINT_TRIGGERING;
+ ensure_cleanup_registered();
+ if (g_msq_id == -1) {
+ g_msq_id = msgget(IPC_PRIVATE, IPC_CREAT | 0600);
+ if (g_msq_id == -1)
+ return -1;
+ }
+ /* first long is mtype; poisoned page becomes the message body */
+ ((long *)addr)[0] = 1;
+ ret = msgsnd(g_msq_id, addr, 64, IPC_NOWAIT);
+ if (ret < 0) {
+ fprintf(stderr, "%s: msgsnd failed (errno=%d)\n", progname, errno);
+ return -1;
+ }
+ return 0;
+}
+
+static int trigger_mq_send_uc(char *addr)
+{
+ struct mq_attr at = { .mq_maxmsg = 4, .mq_msgsize = 64 };
+ int ret;
+
+ PRINT_TRIGGERING;
+ ensure_cleanup_registered();
+ if (g_mq_fd == -1) {
+ mq_unlink("/einj_mq");
+ g_mq_fd = mq_open("/einj_mq", O_CREAT | O_RDWR | O_NONBLOCK,
+ 0600, &at);
+ if (g_mq_fd == -1)
+ return -1;
+ }
+ ret = mq_send(g_mq_fd, addr, 64, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: mq_send failed (errno=%d)\n", progname, errno);
+ return -1;
+ }
+ return 0;
+}
+
+static int trigger_semop_uc(char *addr)
+{
+ struct sembuf *sb = (struct sembuf *)addr;
+ int ret;
+
+ PRINT_TRIGGERING;
+ ensure_cleanup_registered();
+ if (g_sem_id == -1) {
+ g_sem_id = semget(IPC_PRIVATE, 1, IPC_CREAT | 0600);
+ if (g_sem_id == -1)
+ return -1;
+ }
+ /* kernel copies the sembuf array from user */
+ ret = semop(g_sem_id, sb, 1);
+ if (ret < 0) {
+ fprintf(stderr, "%s: semop failed (errno=%d)\n", progname, errno);
+ return -1;
+ }
+ return 0;
+}
+
+/* ========= Memory ========= */
+
+static int trigger_process_vm_writev_uc(char *addr)
+{
+ struct iovec liov = { .iov_base = addr, .iov_len = SCRATCH_LEN };
+ char remote_buf[SCRATCH_LEN] = { 0 };
+ struct iovec riov = { .iov_base = remote_buf, .iov_len = SCRATCH_LEN };
+ ssize_t ret;
+
+ PRINT_TRIGGERING;
+ ret = process_vm_writev(getpid(), &liov, 1, &riov, 1, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: process_vm_writev failed (errno=%d)\n", progname, errno);
+ return -1;
+ }
+ return 0;
+}
+
+/* ========= Test table ========= */
+
+#define UA(name, help, alloc_fn, trig_fn, fl) \
+ { name, help, alloc_fn, inject_mem_uc, 1, trig_fn, fl }
+
+struct test uaccess_tests[] = {
+ /* File I/O */
+ UA("pwrite_uc", "pwrite(2) copies poisoned user buffer into kernel",
+ data_alloc, trigger_pwrite_uc, F_MCE|F_CMCI|F_SIGBUS),
+ UA("writev_uc", "writev(2) iov referencing poisoned page",
+ data_alloc, trigger_writev_uc, F_MCE|F_CMCI|F_SIGBUS),
+
+ /* Network (send direction) */
+ UA("send_uc", "send(2) over AF_UNIX socketpair",
+ data_alloc, trigger_send_uc, F_MCE|F_CMCI|F_SIGBUS),
+ UA("sendmsg_uc", "sendmsg(2) iov referencing poisoned page",
+ data_alloc, trigger_sendmsg_uc, F_MCE|F_CMCI|F_SIGBUS),
+ UA("setsockopt_uc", "setsockopt(2) optval on poisoned page",
+ data_alloc, trigger_setsockopt_uc, F_MCE|F_CMCI|F_SIGBUS),
+ UA("netlink_send_uc", "AF_NETLINK sendto(2) payload on poisoned page",
+ data_alloc, trigger_netlink_send_uc, F_MCE|F_CMCI|F_SIGBUS),
+
+ /* IPC / sync */
+ UA("msgsnd_uc", "SysV msgsnd(2) message body on poisoned page",
+ data_alloc, trigger_msgsnd_uc, F_MCE|F_CMCI|F_SIGBUS),
+ UA("mq_send_uc", "POSIX mq_send(3) body on poisoned page",
+ data_alloc, trigger_mq_send_uc, F_MCE|F_CMCI|F_SIGBUS),
+ UA("semop_uc", "semop(2) sembuf array on poisoned page",
+ data_alloc, trigger_semop_uc, F_MCE|F_CMCI|F_SIGBUS),
+
+ /* Memory */
+ UA("process_vm_writev_uc", "process_vm_writev(2) local iov on poisoned page",
+ data_alloc, trigger_process_vm_writev_uc, F_MCE|F_CMCI|F_SIGBUS),
+};
+
+int uaccess_tests_count = (int)(sizeof(uaccess_tests) / sizeof(uaccess_tests[0]));
--
2.39.3