[PATCH 1/6] seccomp: Introduce SECCOMP_PIN_ARCHITECTURE

From: Kees Cook
Date: Wed Sep 23 2020 - 19:33:01 EST


For systems that provide multiple syscall maps based on audit
architectures (e.g. AUDIT_ARCH_X86_64 and AUDIT_ARCH_I386 via
CONFIG_COMPAT) or via syscall masks (e.g. x86_x32), allow a fast way
to pin the process to a specific syscall table, instead of needing
to generate all filters with an architecture check as the first filter
action.

This creates the internal representation that seccomp itself can use
(which is separate from the filters, which need to stay runtime
agnostic). Additionally paves the way for constant-action bitmaps.

Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx>
---
include/linux/seccomp.h | 9 +++
include/uapi/linux/seccomp.h | 1 +
kernel/seccomp.c | 79 ++++++++++++++++++-
tools/testing/selftests/seccomp/seccomp_bpf.c | 33 ++++++++
4 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 02aef2844c38..0be20bc81ea9 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -20,12 +20,18 @@
#include <linux/atomic.h>
#include <asm/seccomp.h>

+#define SECCOMP_ARCH_IS_NATIVE 1
+#define SECCOMP_ARCH_IS_COMPAT 2
+#define SECCOMP_ARCH_IS_MULTIPLEX 3
+#define SECCOMP_ARCH_IS_UNKNOWN 0xff
+
struct seccomp_filter;
/**
* struct seccomp - the state of a seccomp'ed process
*
* @mode: indicates one of the valid values above for controlled
* system calls available to a process.
+ * @arch: seccomp's internal architecture identifier (not seccomp_data->arch)
* @filter: must always point to a valid seccomp-filter or NULL as it is
* accessed without locking during system call entry.
*
@@ -34,6 +40,9 @@ struct seccomp_filter;
*/
struct seccomp {
int mode;
+#ifdef SECCOMP_ARCH
+ u8 arch;
+#endif
atomic_t filter_count;
struct seccomp_filter *filter;
};
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 6ba18b82a02e..f4d134ebfa7e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -16,6 +16,7 @@
#define SECCOMP_SET_MODE_FILTER 1
#define SECCOMP_GET_ACTION_AVAIL 2
#define SECCOMP_GET_NOTIF_SIZES 3
+#define SECCOMP_PIN_ARCHITECTURE 4

/* Valid flags for SECCOMP_SET_MODE_FILTER */
#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ae6b40cc39f4..0a3ff8eb8aea 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -298,6 +298,47 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
return 0;
}

+#ifdef SECCOMP_ARCH
+static inline u8 seccomp_get_arch(u32 syscall_arch, u32 syscall_nr)
+{
+ u8 seccomp_arch;
+
+ switch (syscall_arch) {
+ case SECCOMP_ARCH:
+ seccomp_arch = SECCOMP_ARCH_IS_NATIVE;
+ break;
+#ifdef CONFIG_COMPAT
+ case SECCOMP_ARCH_COMPAT:
+ seccomp_arch = SECCOMP_ARCH_IS_COMPAT;
+ break;
+#endif
+ default:
+ seccomp_arch = SECCOMP_ARCH_IS_UNKNOWN;
+ }
+
+#ifdef SECCOMP_MULTIPLEXED_SYSCALL_TABLE_ARCH
+ if (syscall_arch == SECCOMP_MULTIPLEXED_SYSCALL_TABLE_ARCH) {
+ seccomp_arch |= (sd->nr & SECCOMP_MULTIPLEXED_SYSCALL_TABLE_MASK) >>
+ SECCOMP_MULTIPLEXED_SYSCALL_TABLE_SHIFT;
+ }
+#endif
+
+ return seccomp_arch;
+}
+#endif
+
+static inline bool seccomp_arch_mismatch(struct seccomp *seccomp,
+ const struct seccomp_data *sd)
+{
+#ifdef SECCOMP_ARCH
+ /* Block mismatched architectures. */
+ if (seccomp->arch && seccomp->arch != seccomp_get_arch(sd->arch, sd->nr))
+ return true;
+#endif
+
+ return false;
+}
+
/**
* seccomp_run_filters - evaluates all seccomp filters against @sd
* @sd: optional seccomp data to be passed to filters
@@ -312,9 +353,14 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
struct seccomp_filter **match)
{
u32 ret = SECCOMP_RET_ALLOW;
+ struct seccomp_filter *f;
+ struct seccomp *seccomp = &current->seccomp;
+
+ if (seccomp_arch_mismatch(seccomp, sd))
+ return SECCOMP_RET_KILL_PROCESS;
+
/* Make sure cross-thread synced filter points somewhere sane. */
- struct seccomp_filter *f =
- READ_ONCE(current->seccomp.filter);
+ f = READ_ONCE(seccomp->filter);

/* Ensure unexpected behavior doesn't result in failing open. */
if (WARN_ON(f == NULL))
@@ -522,6 +568,11 @@ static inline void seccomp_sync_threads(unsigned long flags)
if (task_no_new_privs(caller))
task_set_no_new_privs(thread);

+#ifdef SECCOMP_ARCH
+ /* Copy any pinned architecture. */
+ thread->seccomp.arch = caller->seccomp.arch;
+#endif
+
/*
* Opt the other thread into seccomp if needed.
* As threads are considered to be trust-realm
@@ -1652,6 +1703,23 @@ static long seccomp_get_notif_sizes(void __user *usizes)
return 0;
}

+static long seccomp_pin_architecture(void)
+{
+#ifdef SECCOMP_ARCH
+ struct task_struct *task = current;
+
+ u8 arch = seccomp_get_arch(syscall_get_arch(task),
+ syscall_get_nr(task, task_pt_regs(task)));
+
+ /* How did you even get here? */
+ if (task->seccomp.arch && task->seccomp.arch != arch)
+ return -EBUSY;
+
+ task->seccomp.arch = arch;
+#endif
+ return 0;
+}
+
/* Common entry point for both prctl and syscall. */
static long do_seccomp(unsigned int op, unsigned int flags,
void __user *uargs)
@@ -1673,6 +1741,13 @@ static long do_seccomp(unsigned int op, unsigned int flags,
return -EINVAL;

return seccomp_get_notif_sizes(uargs);
+ case SECCOMP_PIN_ARCHITECTURE:
+ if (flags != 0)
+ return -EINVAL;
+ if (uargs != NULL)
+ return -EINVAL;
+
+ return seccomp_pin_architecture();
default:
return -EINVAL;
}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 9c398768553b..d90551e0385e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -157,6 +157,10 @@ struct seccomp_data {
#define SECCOMP_GET_NOTIF_SIZES 3
#endif

+#ifndef SECCOMP_PIN_ARCHITECTURE
+#define SECCOMP_PIN_ARCHITECTURE 4
+#endif
+
#ifndef SECCOMP_FILTER_FLAG_TSYNC
#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
#endif
@@ -2221,6 +2225,35 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS)
EXPECT_NE(self->mypid, syscall(__NR_getpid));
}

+TEST(seccomp_architecture_pin)
+{
+ long ret;
+
+ ret = seccomp(SECCOMP_PIN_ARCHITECTURE, 0, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_PIN_ARCHITECTURE!");
+ }
+
+ /* Make sure unexpected arguments are rejected. */
+ ret = seccomp(SECCOMP_PIN_ARCHITECTURE, 1, NULL);
+ ASSERT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject SECCOMP_PIN_ARCHITECTURE with flags!");
+ }
+
+ ret = seccomp(SECCOMP_PIN_ARCHITECTURE, 0, &ret);
+ ASSERT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject SECCOMP_PIN_ARCHITECTURE with address!");
+ }
+
+ ret = seccomp(SECCOMP_PIN_ARCHITECTURE, 1, &ret);
+ ASSERT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject SECCOMP_PIN_ARCHITECTURE with flags and address!");
+ }
+}
+
TEST(seccomp_syscall)
{
struct sock_filter filter[] = {
--
2.25.1