[PATCH seccomp 6/6] seccomp/cache: Report cache data through /proc/pid/seccomp_cache
From: YiFei Zhu
Date: Thu Sep 24 2020 - 08:07:22 EST
From: YiFei Zhu <yifeifz2@xxxxxxxxxxxx>
Currently the kernel does not provide an infrastructure to translate
architecture numbers to a human-readable name. Translating syscall
numbers to syscall names is possible through FTRACE_SYSCALL
infrastructure but it does not provide support for compat syscalls.
This will create a file for each PID as /proc/pid/seccomp_cache.
The file will be empty when no seccomp filters are loaded, or be
in the format of:
<hex arch number> <decimal syscall number> <ALLOW | FILTER>
where ALLOW means the cache is guaranteed to allow the syscall,
and filter means the cache will pass the syscall to the BPF filter.
For the docker default profile on x86_64 it looks like:
c000003e 0 ALLOW
c000003e 1 ALLOW
c000003e 2 ALLOW
c000003e 3 ALLOW
[...]
c000003e 132 ALLOW
c000003e 133 ALLOW
c000003e 134 FILTER
c000003e 135 FILTER
c000003e 136 FILTER
c000003e 137 ALLOW
c000003e 138 ALLOW
c000003e 139 FILTER
c000003e 140 ALLOW
c000003e 141 ALLOW
[...]
This file is guarded by CONFIG_PROC_SECCOMP_CACHE with a default
of N because I think certain users of seecomp might not want the
application to know which syscalls are definitely usable.
I'm not sure if adding all the "human readable names" is worthwhile,
considering it can be easily done in userspace.
Suggested-by: Jann Horn <jannh@xxxxxxxxxx>
Link: https://lore.kernel.org/lkml/CAG48ez3Ofqp4crXGksLmZY6=fGrF_tWyUCg7PBkAetvbbOPeOA@xxxxxxxxxxxxxx/
Signed-off-by: YiFei Zhu <yifeifz2@xxxxxxxxxxxx>
---
arch/Kconfig | 10 ++++++++++
fs/proc/base.c | 7 +++++--
include/linux/seccomp.h | 5 +++++
kernel/seccomp.c | 26 ++++++++++++++++++++++++++
4 files changed, 46 insertions(+), 2 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 8cc3dc87f253..dbfd897e5dc0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -514,6 +514,16 @@ config SECCOMP_CACHE_NR_ONLY
endchoice
+config PROC_SECCOMP_CACHE
+ bool "Show seccomp filter cache status in /proc/pid/seccomp_cache"
+ depends on SECCOMP_CACHE_NR_ONLY
+ depends on PROC_FS
+ help
+ This is enables /proc/pid/seccomp_cache interface to monitor
+ seccomp cache data. The file format is subject to change.
+
+ If unsure, say N.
+
config HAVE_ARCH_STACKLEAK
bool
help
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 617db4e0faa0..2af626f69fa1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2615,7 +2615,7 @@ static struct dentry *proc_pident_instantiate(struct dentry *dentry,
return d_splice_alias(inode, dentry);
}
-static struct dentry *proc_pident_lookup(struct inode *dir,
+static struct dentry *proc_pident_lookup(struct inode *dir,
struct dentry *dentry,
const struct pid_entry *p,
const struct pid_entry *end)
@@ -2815,7 +2815,7 @@ static const struct pid_entry attr_dir_stuff[] = {
static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
{
- return proc_pident_readdir(file, ctx,
+ return proc_pident_readdir(file, ctx,
attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
}
@@ -3258,6 +3258,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_PROC_PID_ARCH_STATUS
ONE("arch_status", S_IRUGO, proc_pid_arch_status),
#endif
+#ifdef CONFIG_PROC_SECCOMP_CACHE
+ ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 02aef2844c38..3cedec824365 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -121,4 +121,9 @@ static inline long seccomp_get_metadata(struct task_struct *task,
return -EINVAL;
}
#endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */
+
+#ifdef CONFIG_PROC_SECCOMP_CACHE
+int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task);
+#endif
#endif /* _LINUX_SECCOMP_H */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5b1bd8329e9c..c5697d9483ae 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -2295,3 +2295,29 @@ static int __init seccomp_sysctl_init(void)
device_initcall(seccomp_sysctl_init)
#endif /* CONFIG_SYSCTL */
+
+#ifdef CONFIG_PROC_SECCOMP_CACHE
+/* Currently CONFIG_PROC_SECCOMP_CACHE implies CONFIG_SECCOMP_CACHE_NR_ONLY */
+int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ struct seccomp_filter *f = READ_ONCE(task->seccomp.filter);
+ int arch, nr;
+
+ if (!f)
+ return 0;
+
+ for (arch = 0; arch < ARRAY_SIZE(syscall_arches); arch++) {
+ for (nr = 0; nr < NR_syscalls; nr++) {
+ bool cached = test_bit(nr, f->cache.syscall_ok[arch]);
+ char *status = cached ? "ALLOW" : "FILTER";
+
+ seq_printf(m, "%08x %d %s\n", syscall_arches[arch],
+ nr, status
+ );
+ }
+ }
+
+ return 0;
+}
+#endif /* CONFIG_PROC_SECCOMP_CACHE */
--
2.28.0