Re: [PATCH 3/7] seccomp_filter: Enable ftrace-based system call filtering

From: Will Drewry
Date: Thu Apr 28 2011 - 11:31:03 EST


On Thu, Apr 28, 2011 at 8:50 AM, Steven Rostedt <rostedt@xxxxxxxxxxx> wrote:
> On Wed, 2011-04-27 at 22:08 -0500, Will Drewry wrote:
>
>> -typedef struct { int mode; } seccomp_t;
>> +/**
>> + * struct seccomp_state - the state of a seccomp'ed process
>> + *
>> + * @mode:
>> + *     if this is 1, the process is under standard seccomp rules
>> + *             is 2, the process is only allowed to make system calls where
>> + *                   the corresponding bit is set in bitmask and any
>> + *                   associated filters evaluate successfully.
>
> I hate arbitrary numbers. This is not a big deal, but can we create an
> enum or define that puts names for the modes.
>
> SECCOMP_MODE_BASIC
> SECCOMP_MODE_FILTER
>
> ??

Works for me.

>> + * @usage: number of references to the current instance.
>> + * @bitmask: a mask of allowed or filtered system calls and additional flags.
>> + * @filter_count: number of seccomp filters in @filters.
>> + * @filters: list of seccomp_filter entries for  system calls.
>> + */
>> +struct seccomp_state {
>> +     uint16_t mode;
>> +     atomic_t usage;
>> +     DECLARE_BITMAP(bitmask, NR_syscalls + 1);
>> +     int filter_count;
>> +     struct list_head filters;
>> +};
>> +
>> +typedef struct { struct seccomp_state *state; } seccomp_t;
>>
>>  extern void __secure_computing(int);
>>  static inline void secure_computing(int this_syscall)
>> @@ -16,8 +39,14 @@ static inline void secure_computing(int this_syscall)
>>               __secure_computing(this_syscall);
>>  }
>>
>
>
>> diff --git a/kernel/fork.c b/kernel/fork.c
>> index e7548de..bdcf70b 100644
>> --- a/kernel/fork.c
>> +++ b/kernel/fork.c
>> @@ -34,6 +34,7 @@
>>  #include <linux/cgroup.h>
>>  #include <linux/security.h>
>>  #include <linux/hugetlb.h>
>> +#include <linux/seccomp.h>
>>  #include <linux/swap.h>
>>  #include <linux/syscalls.h>
>>  #include <linux/jiffies.h>
>> @@ -169,6 +170,9 @@ void free_task(struct task_struct *tsk)
>>       free_thread_info(tsk->stack);
>>       rt_mutex_debug_task_free(tsk);
>>       ftrace_graph_exit_task(tsk);
>> +#ifdef CONFIG_SECCOMP
>> +     put_seccomp_state(tsk->seccomp.state);
>> +#endif
>>       free_task_struct(tsk);
>>  }
>>  EXPORT_SYMBOL(free_task);
>> @@ -280,6 +284,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
>>       if (err)
>>               goto out;
>>
>> +#ifdef CONFIG_SECCOMP
>> +     tsk->seccomp.state = get_seccomp_state(orig->seccomp.state);
>> +#endif
>
> I wonder if we could macro these to get rid of the #ifdefs in fork.c.
>
> #define put_seccomp_state(state)        do { } while (0)
> #define assign_seccomp_state(src, state) do { } while (0)
>
> ??

Makes sense to me. I'll add it to the next update.

>> +
>>       setup_thread_stack(tsk, orig);
>>       clear_user_return_notifier(tsk);
>>       clear_tsk_need_resched(tsk);
>> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
>> index 57d4b13..1bee87c 100644
>> --- a/kernel/seccomp.c
>> +++ b/kernel/seccomp.c
>> @@ -8,10 +8,11 @@
>>
>>  #include <linux/seccomp.h>
>>  #include <linux/sched.h>
>> +#include <linux/slab.h>
>>  #include <linux/compat.h>
>>
>>  /* #define SECCOMP_DEBUG 1 */
>> -#define NR_SECCOMP_MODES 1
>> +#define NR_SECCOMP_MODES 2
>>
>>  /*
>>   * Secure computing mode 1 allows only read/write/exit/sigreturn.
>> @@ -32,9 +33,11 @@ static int mode1_syscalls_32[] = {
>>
>>  void __secure_computing(int this_syscall)
>>  {
>> -     int mode = current->seccomp.mode;
>> +     int mode = -1;
>>       int * syscall;
>> -
>> +     /* Do we need an RCU read lock to access current's state? */
>
> I'm actually confused to why you are using RCU. What are you protecting.
> Currently, I see the state is always accessed from current->seccomp. But
> current should not be fighting with itself.
>
> Maybe I'm missing something.

I'm sure it's me that's missing something. I believe the seccomp
pointer can be accessed from:
- current
- via /proc/<pid>/seccomp_filter (read-only)

Given those cases, would it make sense to ditch the RCU interface for it?

Thanks!
will

>
>> +     if (current->seccomp.state)
>> +             mode = current->seccomp.state->mode;
>>       switch (mode) {
>>       case 1:
>>               syscall = mode1_syscalls;
>> @@ -47,6 +50,16 @@ void __secure_computing(int this_syscall)
>>                               return;
>>               } while (*++syscall);
>>               break;
>> +     case 2:
>> +#ifdef CONFIG_COMPAT
>> +             if (is_compat_task())
>> +                     /* XXX: No compat support yet. */
>> +                     break;
>> +#endif
>> +             if (!seccomp_test_filters(current->seccomp.state,
>> +                                       this_syscall))
>> +                     return;
>> +             break;
>>       default:
>>               BUG();
>>       }
>> @@ -57,30 +70,139 @@ void __secure_computing(int this_syscall)
>>       do_exit(SIGKILL);
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/