[PATCH v3 2/2] tests: Add CABA selftest

From: Pavel Tikhomirov
Date: Thu Sep 08 2022 - 10:04:02 EST


This test creates a "tricky" example process tree where session leaders
of two sessions are children of pid namespace init, also they have their
own children, leader of session A has child with session B and leader
from session B has child with session A.

We check that Closest Alive Born Ancestor tree is right for this case.
This case illustrates how CABA tree helps to understand order of
creation between sessions.

CC: Eric Biederman <ebiederm@xxxxxxxxxxxx>
CC: Kees Cook <keescook@xxxxxxxxxxxx>
CC: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx>
CC: Ingo Molnar <mingo@xxxxxxxxxx>
CC: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CC: Juri Lelli <juri.lelli@xxxxxxxxxx>
CC: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
CC: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
CC: Steven Rostedt <rostedt@xxxxxxxxxxx>
CC: Ben Segall <bsegall@xxxxxxxxxx>
CC: Mel Gorman <mgorman@xxxxxxx>
CC: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
CC: Valentin Schneider <vschneid@xxxxxxxxxx>
CC: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
CC: linux-ia64@xxxxxxxxxxxxxxx
CC: linux-kernel@xxxxxxxxxxxxxxx
CC: linux-mm@xxxxxxxxx
CC: linux-fsdevel@xxxxxxxxxxxxxxx
CC: kernel@xxxxxxxxxx

Signed-off-by: Pavel Tikhomirov <ptikhomirov@xxxxxxxxxxxxx>

--
v3: fix codding style
---
tools/testing/selftests/Makefile | 1 +
tools/testing/selftests/caba/.gitignore | 1 +
tools/testing/selftests/caba/Makefile | 7 +
tools/testing/selftests/caba/caba_test.c | 509 +++++++++++++++++++++++
tools/testing/selftests/caba/config | 1 +
5 files changed, 519 insertions(+)
create mode 100644 tools/testing/selftests/caba/.gitignore
create mode 100644 tools/testing/selftests/caba/Makefile
create mode 100644 tools/testing/selftests/caba/caba_test.c
create mode 100644 tools/testing/selftests/caba/config

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c2064a35688b..d545bd9e3637 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -3,6 +3,7 @@ TARGETS += alsa
TARGETS += arm64
TARGETS += bpf
TARGETS += breakpoints
+TARGETS += caba
TARGETS += capabilities
TARGETS += cgroup
TARGETS += clone3
diff --git a/tools/testing/selftests/caba/.gitignore b/tools/testing/selftests/caba/.gitignore
new file mode 100644
index 000000000000..aa2c55b774e2
--- /dev/null
+++ b/tools/testing/selftests/caba/.gitignore
@@ -0,0 +1 @@
+caba_test
diff --git a/tools/testing/selftests/caba/Makefile b/tools/testing/selftests/caba/Makefile
new file mode 100644
index 000000000000..4260145c3747
--- /dev/null
+++ b/tools/testing/selftests/caba/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for caba selftests.
+CFLAGS = -g -I../../../../usr/include/ -Wall -O2
+
+TEST_GEN_FILES += caba_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/caba/caba_test.c b/tools/testing/selftests/caba/caba_test.c
new file mode 100644
index 000000000000..a89c4b96393b
--- /dev/null
+++ b/tools/testing/selftests/caba/caba_test.c
@@ -0,0 +1,509 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sched.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/user.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef CLONE_NEWPID
+#define CLONE_NEWPID 0x20000000 /* New pid namespace */
+#endif
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
+struct process {
+ pid_t pid;
+ pid_t real;
+ pid_t caba;
+ int sks[2];
+ int dead;
+};
+
+struct process *processes;
+int nr_processes = 8;
+int current;
+
+static void cleanup(void)
+{
+ kill(processes[0].pid, SIGKILL);
+ /* It's enough to kill pidns init for others to die */
+ kill(processes[1].pid, SIGKILL);
+}
+
+enum commands {
+ TEST_FORK,
+ TEST_WAIT,
+ TEST_SUBREAPER,
+ TEST_SETSID,
+ TEST_DIE,
+ /* unused */
+ TEST_GETSID,
+ TEST_SETNS,
+ TEST_SETPGID,
+ TEST_GETPGID,
+ TEST_GETPPID,
+};
+
+struct command {
+ enum commands cmd;
+ int arg1;
+ int arg2;
+};
+
+static void handle_command(void);
+
+static void mainloop(void)
+{
+ while (1)
+ handle_command();
+}
+
+#define CLONE_STACK_SIZE 4096
+#define __stack_aligned__ __attribute__((aligned(16)))
+/* All arguments should be above stack, because it grows down */
+struct clone_args {
+ char stack[CLONE_STACK_SIZE] __stack_aligned__;
+ char stack_ptr[0];
+ int id;
+};
+
+static int get_real_pid(void)
+{
+ char buf[11];
+ int ret;
+
+ ret = readlink("/proc/self", buf, sizeof(buf)-1);
+ if (ret <= 0) {
+ fprintf(stderr, "%d: readlink /proc/self :%m", current);
+ return -1;
+ }
+ buf[ret] = '\0';
+
+ processes[current].real = atoi(buf);
+ return 0;
+}
+
+static int clone_func(void *_arg)
+{
+ struct clone_args *args = (struct clone_args *) _arg;
+
+ current = args->id;
+
+ if (get_real_pid())
+ exit(1);
+
+ printf("%3d: Hello. My pid is %d\n", args->id, getpid());
+ mainloop();
+ exit(0);
+}
+
+static int make_child(int id, int flags)
+{
+ struct clone_args args;
+ pid_t cid;
+
+ args.id = id;
+
+ cid = clone(clone_func, args.stack_ptr,
+ flags | SIGCHLD, &args);
+
+ if (cid < 0)
+ fprintf(stderr, "clone(%d, %d) :%m", id, flags);
+
+ processes[id].pid = cid;
+
+ return cid;
+}
+
+static int open_proc(void)
+{
+ int fd;
+ char proc_mountpoint[] = "/tmp/.caba_test.proc.XXXXXX";
+
+ if (mkdtemp(proc_mountpoint) == NULL) {
+ fprintf(stderr, "mkdtemp failed %s :%m\n", proc_mountpoint);
+ return -1;
+ }
+
+ if (mount("proc", proc_mountpoint, "proc",
+ MS_MGC_VAL | MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL)) {
+ fprintf(stderr, "mount proc failed :%m\n");
+ rmdir(proc_mountpoint);
+ return -1;
+ }
+
+ fd = open(proc_mountpoint, O_RDONLY | O_DIRECTORY, 0);
+ if (fd < 0)
+ fprintf(stderr, "can't open proc :%m\n");
+
+ if (umount2(proc_mountpoint, MNT_DETACH)) {
+ fprintf(stderr, "can't umount proc :%m\n");
+ goto err_close;
+ }
+
+ if (rmdir(proc_mountpoint)) {
+ fprintf(stderr, "can't remove tmp dir :%m\n");
+ goto err_close;
+ }
+
+ return fd;
+err_close:
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+static int open_pidns(int pid)
+{
+ int proc, fd;
+ char pidns_path[PATH_MAX];
+
+ proc = open_proc();
+ if (proc < 0) {
+ fprintf(stderr, "open proc\n");
+ return -1;
+ }
+
+ sprintf(pidns_path, "%d/ns/pid", pid);
+ fd = openat(proc, pidns_path, O_RDONLY);
+ if (fd == -1)
+ fprintf(stderr, "open pidns fd\n");
+
+ close(proc);
+ return fd;
+}
+
+static int setns_pid(int pid, int nstype)
+{
+ int pidns, ret;
+
+ pidns = open_pidns(pid);
+ if (pidns < 0)
+ return -1;
+
+ ret = setns(pidns, nstype);
+ if (ret == -1)
+ fprintf(stderr, "setns :%m\n");
+
+ close(pidns);
+ return ret;
+}
+
+static void handle_command(void)
+{
+ int sk = processes[current].sks[0], ret, status = 0;
+ struct command cmd;
+
+ ret = read(sk, &cmd, sizeof(cmd));
+ if (ret != sizeof(cmd)) {
+ fprintf(stderr, "Unable to get command :%m\n");
+ goto err;
+ }
+
+ switch (cmd.cmd) {
+ case TEST_FORK:
+ {
+ pid_t pid;
+
+ pid = make_child(cmd.arg1, cmd.arg2);
+ if (pid == -1) {
+ status = -1;
+ goto err;
+ }
+
+ printf("%3d: fork(%d, %x) = %d\n",
+ current, cmd.arg1, cmd.arg2, pid);
+ processes[cmd.arg1].pid = pid;
+ }
+ break;
+ case TEST_WAIT:
+ printf("%3d: wait(%d) = %d\n", current,
+ cmd.arg1, processes[cmd.arg1].pid);
+
+ if (waitpid(processes[cmd.arg1].pid, NULL, 0) == -1) {
+ fprintf(stderr, "waitpid(%d) :%m\n", processes[cmd.arg1].pid);
+ status = -1;
+ }
+ break;
+ case TEST_SUBREAPER:
+ printf("%3d: subreaper(%d)\n", current, cmd.arg1);
+ if (prctl(PR_SET_CHILD_SUBREAPER, cmd.arg1, 0, 0, 0) == -1) {
+ fprintf(stderr, "PR_SET_CHILD_SUBREAPER :%m\n");
+ status = -1;
+ }
+ break;
+ case TEST_SETSID:
+ printf("%3d: setsid()\n", current);
+ if (setsid() == -1) {
+ fprintf(stderr, "setsid :%m\n");
+ status = -1;
+ }
+ break;
+ case TEST_GETSID:
+ printf("%3d: getsid()\n", current);
+ status = getsid(getpid());
+ if (status == -1)
+ fprintf(stderr, "getsid :%m\n");
+ break;
+ case TEST_SETPGID:
+ printf("%3d: setpgid(%d, %d)\n", current, cmd.arg1, cmd.arg2);
+ if (setpgid(processes[cmd.arg1].pid, processes[cmd.arg2].pid) == -1) {
+ fprintf(stderr, "setpgid :%m\n");
+ status = -1;
+ }
+ break;
+ case TEST_GETPGID:
+ printf("%3d: getpgid()\n", current);
+ status = getpgid(0);
+ if (status == -1)
+ fprintf(stderr, "getpgid :%m\n");
+ break;
+ case TEST_GETPPID:
+ printf("%3d: getppid()\n", current);
+ status = getppid();
+ if (status == -1)
+ fprintf(stderr, "getppid :%m\n");
+ break;
+ case TEST_SETNS:
+ printf("%3d: setns(%d, %d) = %d\n", current,
+ cmd.arg1, cmd.arg2, processes[cmd.arg1].pid);
+ setns_pid(processes[cmd.arg1].pid, cmd.arg2);
+
+ break;
+ case TEST_DIE:
+ printf("%3d: die()\n", current);
+ processes[current].dead = 1;
+ shutdown(sk, SHUT_RDWR);
+ exit(0);
+ }
+
+ ret = write(sk, &status, sizeof(status));
+ if (ret != sizeof(status)) {
+ fprintf(stderr, "Unable to answer :%m\n");
+ goto err;
+ }
+
+ if (status < 0)
+ goto err;
+
+ return;
+err:
+ shutdown(sk, SHUT_RDWR);
+ exit(1);
+}
+
+static int send_command(int id, enum commands op, int arg1, int arg2)
+{
+ int sk = processes[id].sks[1], ret, status;
+ struct command cmd = {op, arg1, arg2};
+
+ if (op == TEST_FORK) {
+ if (processes[arg1].pid) {
+ fprintf(stderr, "%d is busy :%m\n", arg1);
+ return -1;
+ }
+ }
+
+ ret = write(sk, &cmd, sizeof(cmd));
+ if (ret != sizeof(cmd)) {
+ fprintf(stderr, "Unable to send command :%m\n");
+ goto err;
+ }
+
+ status = 0;
+ ret = read(sk, &status, sizeof(status));
+ if (ret != sizeof(status) && !(status == 0 && op == TEST_DIE)) {
+ fprintf(stderr, "Unable to get answer :%m\n");
+ goto err;
+ }
+
+ if (status != -1 && (op == TEST_GETSID || op == TEST_GETPGID || op == TEST_GETPPID))
+ return status;
+
+ if (status) {
+ fprintf(stderr, "The command(%d, %d, %d) failed :%m\n", op, arg1, arg2);
+ goto err;
+ }
+
+ return 0;
+err:
+ cleanup();
+ exit(1);
+}
+
+static int get_caba(int pid, int *caba)
+{
+ char buf[64], *str;
+ FILE *fp;
+ size_t n;
+
+ if (!pid)
+ snprintf(buf, sizeof(buf), "/proc/self/status");
+ else
+ snprintf(buf, sizeof(buf), "/proc/%d/status", pid);
+
+ fp = fopen(buf, "r");
+ if (!fp) {
+ perror("fopen");
+ return -1;
+ }
+
+ str = NULL;
+ while (getline(&str, &n, fp) != -1) {
+ if (strncmp(str, "NScaba:", 7) == 0) {
+ if (str[7] == '\0') {
+ *caba = 0;
+ } else {
+ if (sscanf(str+7, "%d", caba) != 1) {
+ perror("sscanf");
+ goto err;
+ }
+ }
+
+ fclose(fp);
+ free(str);
+ return 0;
+ }
+ }
+err:
+ free(str);
+ fclose(fp);
+ return -1;
+}
+
+static bool caba_supported(void)
+{
+ int caba;
+
+ return !get_caba(0, &caba);
+}
+
+FIXTURE(caba) {
+};
+
+FIXTURE_SETUP(caba)
+{
+ bool ret;
+
+ ret = caba_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "CABA is not supported");
+}
+
+FIXTURE_TEARDOWN(caba)
+{
+ bool ret;
+
+ ret = caba_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "CABA is not supported");
+
+ cleanup();
+}
+
+TEST_F(caba, complex_sessions)
+{
+ int ret, i, pid, caba;
+
+ ret = caba_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "CABA is not supported");
+
+ processes = mmap(NULL, PAGE_SIZE,
+ PROT_WRITE | PROT_READ, MAP_SHARED | MAP_ANONYMOUS,
+ 0, 0);
+ ASSERT_NE(processes, MAP_FAILED);
+ for (i = 0; i < nr_processes; i++) {
+ ret = socketpair(PF_UNIX, SOCK_STREAM, 0, processes[i].sks);
+ ASSERT_EQ(ret, 0);
+ }
+
+ /*
+ * Create init:
+ * (pid, sid)
+ * (1, 1)
+ */
+ pid = make_child(0, 0); ASSERT_GT(pid, 0);
+ ret = send_command(0, TEST_FORK, 1, CLONE_NEWPID);
+ ASSERT_EQ(ret, 0);
+ ret = send_command(1, TEST_SETSID, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ /*
+ * Create sequence of processes from one session:
+ * (pid, sid)
+ * (1, 1)---(2, 2)---(3, 2)---(4, 2)---(5, 2)
+ */
+ ret = send_command(1, TEST_FORK, 2, 0); ASSERT_EQ(ret, 0);
+ ret = send_command(2, TEST_SETSID, 0, 0); ASSERT_EQ(ret, 0);
+ ret = send_command(2, TEST_FORK, 3, 0); ASSERT_EQ(ret, 0);
+ ret = send_command(3, TEST_FORK, 4, 0); ASSERT_EQ(ret, 0);
+ ret = send_command(4, TEST_FORK, 5, 0); ASSERT_EQ(ret, 0);
+ /*
+ * Create another session in the middle of first one:
+ * (pid, sid)
+ * (1, 1)---(2, 2)---(3, 2)---(4, 4)-+-(5, 2)
+ * `-(6, 4)---(7, 4)
+ */
+ ret = send_command(4, TEST_SETSID, 0, 0); ASSERT_EQ(ret, 0);
+ ret = send_command(4, TEST_FORK, 6, 0); ASSERT_EQ(ret, 0);
+ ret = send_command(6, TEST_FORK, 7, 0); ASSERT_EQ(ret, 0);
+
+ /*
+ * Kill 6 while having 2 as child-sub-reaper:
+ * (pid, sid)
+ * (1, 1)---(2, 2)---(3, 2)---(4, 4)-+-(5, 2)
+ * `-(7, 4)
+ */
+ ret = send_command(2, TEST_SUBREAPER, 1, 0); ASSERT_EQ(ret, 0);
+ ret = send_command(6, TEST_DIE, 0, 0); ASSERT_EQ(ret, 0);
+ ret = send_command(4, TEST_WAIT, 6, 0); ASSERT_EQ(ret, 0);
+ ret = send_command(2, TEST_SUBREAPER, 0, 0); ASSERT_EQ(ret, 0);
+
+ /*
+ * Kill 3:
+ * (pid, sid)
+ * (1, 1)-+-(2, 2)---(7, 4)
+ * `-(4, 4)---(5, 2)
+ * note: This is a "tricky" session tree example where it's not obvious
+ * whether sid 2 was created first or sid 4 when creating the tree.
+ */
+ ret = send_command(3, TEST_DIE, 0, 0); ASSERT_EQ(ret, 0);
+ ret = send_command(2, TEST_WAIT, 3, 0); ASSERT_EQ(ret, 0);
+
+ /*
+ * CABA tree for this would be:
+ * (pid, sid)
+ * (1, 1)---(2, 2)---(4, 4)-+-(5, 2)
+ * `-(7, 4)
+ * note: CABA allows us to understand that session 2 was created first.
+ */
+ ret = get_caba(processes[2].real, &caba);
+ ASSERT_EQ(ret, 0); ASSERT_EQ(caba, processes[1].real);
+ ret = get_caba(processes[4].real, &caba);
+ ASSERT_EQ(ret, 0); ASSERT_EQ(caba, processes[2].real);
+ ret = get_caba(processes[5].real, &caba);
+ ASSERT_EQ(ret, 0); ASSERT_EQ(caba, processes[4].real);
+ ret = get_caba(processes[7].real, &caba);
+ ASSERT_EQ(ret, 0); ASSERT_EQ(caba, processes[4].real);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/caba/config b/tools/testing/selftests/caba/config
new file mode 100644
index 000000000000..eae7bdaa3790
--- /dev/null
+++ b/tools/testing/selftests/caba/config
@@ -0,0 +1 @@
+CONFIG_PID_NS=y
--
2.37.1