[PATCH 3/7] seccomp: Allow arch code to provide seccomp_data

From: Andy Lutomirski
Date: Tue Jul 15 2014 - 15:36:45 EST


populate_seccomp_data is expensive: it works by inspecting
task_pt_regs and various other bits to piece together all the
information, and it's does so in multiple partially redundant steps.

Arch-specific code in the syscall entry path can do much better.

Admittedly this adds a bit of additional room for error, but the
speedup should be worth it.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
---
include/linux/seccomp.h | 2 +-
kernel/seccomp.c | 32 +++++++++++++++++++-------------
2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 8345fdc..4fc7a84 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -37,7 +37,7 @@ static inline int secure_computing(void)
#define SECCOMP_PHASE1_OK 0
#define SECCOMP_PHASE1_SKIP 1

-extern u32 seccomp_phase1(void);
+extern u32 seccomp_phase1(struct seccomp_data *sd);
int seccomp_phase2(u32 phase1_result);
#else
extern void secure_computing_strict(int this_syscall);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d737445..391f6c4 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -171,24 +171,27 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
*
* Returns valid seccomp BPF response codes.
*/
-static u32 seccomp_run_filters(void)
+static u32 seccomp_run_filters(struct seccomp_data *sd)
{
struct seccomp_filter *f;
- struct seccomp_data sd;
+ struct seccomp_data sd_local;
u32 ret = SECCOMP_RET_ALLOW;

/* Ensure unexpected behavior doesn't result in failing open. */
if (WARN_ON(current->seccomp.filter == NULL))
return SECCOMP_RET_KILL;

- populate_seccomp_data(&sd);
+ if (!sd) {
+ populate_seccomp_data(&sd_local);
+ sd = &sd_local;
+ }

/*
* All filters in the list are evaluated and the lowest BPF return
* value always takes priority (ignoring the DATA).
*/
for (f = current->seccomp.filter; f; f = f->prev) {
- u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
+ u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)sd);

if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
ret = cur_ret;
@@ -415,7 +418,7 @@ void secure_computing_strict(int this_syscall)
#else
int __secure_computing(void)
{
- u32 phase1_result = seccomp_phase1();
+ u32 phase1_result = seccomp_phase1(NULL);

if (likely(phase1_result == SECCOMP_PHASE1_OK))
return 0;
@@ -426,22 +429,22 @@ int __secure_computing(void)
}

#ifdef CONFIG_SECCOMP_FILTER
-static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs)
+static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd)
{
- u32 filter_ret = seccomp_run_filters();
+ u32 filter_ret = seccomp_run_filters(sd);
int data = filter_ret & SECCOMP_RET_DATA;
u32 action = filter_ret & SECCOMP_RET_ACTION;

switch (action) {
case SECCOMP_RET_ERRNO:
/* Set the low-order 16-bits as a errno. */
- syscall_set_return_value(current, regs,
+ syscall_set_return_value(current, task_pt_regs(current),
-data, 0);
goto skip;

case SECCOMP_RET_TRAP:
/* Show the handler the original registers. */
- syscall_rollback(current, regs);
+ syscall_rollback(current, task_pt_regs(current));
/* Let the filter pass back 16 bits of data. */
seccomp_send_sigsys(this_syscall, data);
goto skip;
@@ -468,11 +471,14 @@ skip:

/**
* seccomp_phase1() - run fast path seccomp checks on the current syscall
+ * @arg sd: The seccomp_data or NULL
*
* This only reads pt_regs via the syscall_xyz helpers. The only change
* it will make to pt_regs is via syscall_set_return_value, and it will
* only do that if it returns SECCOMP_PHASE1_SKIP.
*
+ * If sd is provided, it will not read pt_regs at all.
+ *
* It may also call do_exit or force a signal; these actions must be
* safe.
*
@@ -486,11 +492,11 @@ skip:
* If it returns anything else, then the return value should be passed
* to seccomp_phase2 from a context in which ptrace hooks are safe.
*/
-u32 seccomp_phase1(void)
+u32 seccomp_phase1(struct seccomp_data *sd)
{
int mode = current->seccomp.mode;
- struct pt_regs *regs = task_pt_regs(current);
- int this_syscall = syscall_get_nr(current, regs);
+ int this_syscall = sd ? sd->nr :
+ syscall_get_nr(current, task_pt_regs(current));

switch (mode) {
case SECCOMP_MODE_STRICT:
@@ -498,7 +504,7 @@ u32 seccomp_phase1(void)
return SECCOMP_PHASE1_OK;
#ifdef CONFIG_SECCOMP_FILTER
case SECCOMP_MODE_FILTER:
- return __seccomp_phase1_filter(this_syscall, regs);
+ return __seccomp_phase1_filter(this_syscall, sd);
#endif
default:
BUG();
--
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/