Re: [PATCH 3/7] seccomp_filter: Enable ftrace-based system callfiltering
From: Steven Rostedt
Date: Thu Apr 28 2011 - 09:50:39 EST
On Wed, 2011-04-27 at 22:08 -0500, Will Drewry wrote:
> -typedef struct { int mode; } seccomp_t;
> +/**
> + * struct seccomp_state - the state of a seccomp'ed process
> + *
> + * @mode:
> + * if this is 1, the process is under standard seccomp rules
> + * is 2, the process is only allowed to make system calls where
> + * the corresponding bit is set in bitmask and any
> + * associated filters evaluate successfully.
I hate arbitrary numbers. This is not a big deal, but can we create an
enum or define that puts names for the modes.
SECCOMP_MODE_BASIC
SECCOMP_MODE_FILTER
??
> + * @usage: number of references to the current instance.
> + * @bitmask: a mask of allowed or filtered system calls and additional flags.
> + * @filter_count: number of seccomp filters in @filters.
> + * @filters: list of seccomp_filter entries for system calls.
> + */
> +struct seccomp_state {
> + uint16_t mode;
> + atomic_t usage;
> + DECLARE_BITMAP(bitmask, NR_syscalls + 1);
> + int filter_count;
> + struct list_head filters;
> +};
> +
> +typedef struct { struct seccomp_state *state; } seccomp_t;
>
> extern void __secure_computing(int);
> static inline void secure_computing(int this_syscall)
> @@ -16,8 +39,14 @@ static inline void secure_computing(int this_syscall)
> __secure_computing(this_syscall);
> }
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index e7548de..bdcf70b 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -34,6 +34,7 @@
> #include <linux/cgroup.h>
> #include <linux/security.h>
> #include <linux/hugetlb.h>
> +#include <linux/seccomp.h>
> #include <linux/swap.h>
> #include <linux/syscalls.h>
> #include <linux/jiffies.h>
> @@ -169,6 +170,9 @@ void free_task(struct task_struct *tsk)
> free_thread_info(tsk->stack);
> rt_mutex_debug_task_free(tsk);
> ftrace_graph_exit_task(tsk);
> +#ifdef CONFIG_SECCOMP
> + put_seccomp_state(tsk->seccomp.state);
> +#endif
> free_task_struct(tsk);
> }
> EXPORT_SYMBOL(free_task);
> @@ -280,6 +284,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
> if (err)
> goto out;
>
> +#ifdef CONFIG_SECCOMP
> + tsk->seccomp.state = get_seccomp_state(orig->seccomp.state);
> +#endif
I wonder if we could macro these to get rid of the #ifdefs in fork.c.
#define put_seccomp_state(state) do { } while (0)
#define assign_seccomp_state(src, state) do { } while (0)
??
> +
> setup_thread_stack(tsk, orig);
> clear_user_return_notifier(tsk);
> clear_tsk_need_resched(tsk);
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index 57d4b13..1bee87c 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -8,10 +8,11 @@
>
> #include <linux/seccomp.h>
> #include <linux/sched.h>
> +#include <linux/slab.h>
> #include <linux/compat.h>
>
> /* #define SECCOMP_DEBUG 1 */
> -#define NR_SECCOMP_MODES 1
> +#define NR_SECCOMP_MODES 2
>
> /*
> * Secure computing mode 1 allows only read/write/exit/sigreturn.
> @@ -32,9 +33,11 @@ static int mode1_syscalls_32[] = {
>
> void __secure_computing(int this_syscall)
> {
> - int mode = current->seccomp.mode;
> + int mode = -1;
> int * syscall;
> -
> + /* Do we need an RCU read lock to access current's state? */
I'm actually confused to why you are using RCU. What are you protecting.
Currently, I see the state is always accessed from current->seccomp. But
current should not be fighting with itself.
Maybe I'm missing something.
-- Steve
> + if (current->seccomp.state)
> + mode = current->seccomp.state->mode;
> switch (mode) {
> case 1:
> syscall = mode1_syscalls;
> @@ -47,6 +50,16 @@ void __secure_computing(int this_syscall)
> return;
> } while (*++syscall);
> break;
> + case 2:
> +#ifdef CONFIG_COMPAT
> + if (is_compat_task())
> + /* XXX: No compat support yet. */
> + break;
> +#endif
> + if (!seccomp_test_filters(current->seccomp.state,
> + this_syscall))
> + return;
> + break;
> default:
> BUG();
> }
> @@ -57,30 +70,139 @@ void __secure_computing(int this_syscall)
> do_exit(SIGKILL);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/