Re: [PATCH v14 01/13] sk_run_filter: add BPF_S_ANC_SECCOMP_LD_W
From: Will Drewry
Date: Tue Mar 13 2012 - 11:43:29 EST
On Tue, Mar 13, 2012 at 5:04 AM, Indan Zupancic <indan@xxxxxx> wrote:
> Hello,
>
> I made a quick pseudo-patch for BPF JIT support. As far as I can tell,
> the actual code itself is very simple, just:
Awesome - yet another reason this approach is nicer. When I'm done
working up v15, I'll pull in this patch and see what explodes and/or
runs really fast.
cheers!
will
> case BPF_S_ANC_SECCOMP_LD_W:
> /* SECCOMP doesn't use SKB, no need to preserve %rdi. */
> t_offset = seccomp_bpf_load - (image + addrs[i]);
> EMIT1_off32(0xbf, K); /* mov imm32,%rdi */
> EMIT1_off32(0xe8, t_offset); /* call */
> break;
>
> EAX is set directly as it's the return register, EBX is preserved by the
> callee, RDI and other registers are unused by seccomp, so no need for
> trampoline code AFAIK.
>
> The rest of the patch just makes the JIT code suitable for sharing.
> Only real change is that after this patch unused insns memory is freed.
>
> The code is untested and even uncompiled, as I've only access to my 32-bit
> laptop at the moment.
>
> Would be interesting to know if this actually works and what the performance
> difference is for seccomp.
>
> Greetings,
>
> Indan
>
>
> arch/x86/net/bpf_jit_comp.c | 47 ++++++++++++++++++++----------------------
> include/linux/filter.h | 14 +++++++-----
> net/core/filter.c | 27 ++++++++++++++++++++++--
> 3 files changed, 54 insertions(+), 34 deletions(-)
>
> ---
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 7c1b765..3cd6626 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -118,7 +118,7 @@ static inline void bpf_flush_icache(void *start, void *end)
> }
>
>
> -void bpf_jit_compile(struct sk_filter *fp)
> +bpf_func_t bpf_jit_compile(const struct sock_filter* filter, int flen, int use_skb)
> {
> u8 temp[64];
> u8 *prog;
> @@ -131,15 +131,13 @@ void bpf_jit_compile(struct sk_filter *fp)
> int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
> unsigned int cleanup_addr; /* epilogue code offset */
> unsigned int *addrs;
> - const struct sock_filter *filter = fp->insns;
> - int flen = fp->len;
>
> if (!bpf_jit_enable)
> - return;
> + return NULL;
>
> addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
> if (addrs == NULL)
> - return;
> + return NULL;
>
> /* Before first pass, make a rough estimation of addrs[]
> * each bpf instruction is translated to less than 64 bytes
> @@ -151,11 +149,16 @@ void bpf_jit_compile(struct sk_filter *fp)
> cleanup_addr = proglen; /* epilogue address */
>
> for (pass = 0; pass < 10; pass++) {
> - u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
> + u8 seen_or_pass0 = seen;
> /* no prologue/epilogue for trivial filters (RET something) */
> proglen = 0;
> prog = temp;
>
> + if (pass == 0) {
> + seen_or_pass0 = SEEN_XREG | SEEN_MEM;
> + if (use_skb)
> + seen_or_pass0 |= SEEN_DATAREF;
> + }
> if (seen_or_pass0) {
> EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
> EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */
> @@ -472,6 +475,14 @@ void bpf_jit_compile(struct sk_filter *fp)
> CLEAR_A();
> #endif
> break;
> +#ifdef CONFIG_SECCOMP_FILTER
> + case BPF_S_ANC_SECCOMP_LD_W:
> + /* SECCOMP doesn't use SKB, no need to preserve %rdi. */
> + t_offset = seccomp_bpf_load - (image + addrs[i]);
> + EMIT1_off32(0xbf, K); /* mov imm32,%rdi */
> + EMIT1_off32(0xe8, t_offset); /* call */
> + break;
> +#endif
> case BPF_S_LD_W_ABS:
> func = sk_load_word;
> common_load: seen |= SEEN_DATAREF;
> @@ -588,13 +599,14 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
> /* hmm, too complex filter, give up with jit compiler */
> goto out;
> }
> + BUG_ON(!use_skb && (seen & SEEN_DATAREF));
> ilen = prog - temp;
> if (image) {
> if (unlikely(proglen + ilen > oldproglen)) {
> pr_err("bpb_jit_compile fatal error\n");
> kfree(addrs);
> module_free(NULL, image);
> - return;
> + return NULL;
> }
> memcpy(image + proglen, temp, ilen);
> }
> @@ -635,28 +647,13 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
> 16, 1, image, proglen, false);
>
> bpf_flush_icache(image, image + proglen);
> -
> - fp->bpf_func = (void *)image;
> }
> out:
> kfree(addrs);
> - return;
> + return (void *)image;
> }
>
> -static void jit_free_defer(struct work_struct *arg)
> +void bpf_jit_free(bpf_func_t image)
> {
> - module_free(NULL, arg);
> -}
> -
> -/* run from softirq, we must use a work_struct to call
> - * module_free() from process context
> - */
> -void bpf_jit_free(struct sk_filter *fp)
> -{
> - if (fp->bpf_func != sk_run_filter) {
> - struct work_struct *work = (struct work_struct *)fp->bpf_func;
> -
> - INIT_WORK(work, jit_free_defer);
> - schedule_work(work);
> - }
> + module_free(NULL, image);
> }
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 8eeb205..292ccca 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -135,12 +135,13 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */
> struct sk_buff;
> struct sock;
>
> +typedef unsigned int (*bpf_func_t)(const struct sk_buff*, const struct sock_filter*);
> +
> struct sk_filter
> {
> atomic_t refcnt;
> unsigned int len; /* Number of filter blocks */
> - unsigned int (*bpf_func)(const struct sk_buff *skb,
> - const struct sock_filter *filter);
> + bpf_func_t bpf_func;
> struct rcu_head rcu;
> struct sock_filter insns[0];
> };
> @@ -158,14 +159,15 @@ extern int sk_detach_filter(struct sock *sk);
> extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
>
> #ifdef CONFIG_BPF_JIT
> -extern void bpf_jit_compile(struct sk_filter *fp);
> -extern void bpf_jit_free(struct sk_filter *fp);
> +extern bpf_func_t bpf_jit_compile(const struct sock_filter*, int flen, int use_skb);
> +extern void bpf_jit_free(bpf_funct_t);
> #define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
> #else
> -static inline void bpf_jit_compile(struct sk_filter *fp)
> +static inline bpf_func_t bpf_jit_compile(const struct sock_filter*, int flen, int use_skb)
> {
> + return NULL;
> }
> -static inline void bpf_jit_free(struct sk_filter *fp)
> +static inline void bpf_jit_free(bpf_func_t)
> {
> }
> #define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 5dea452..03e3ea3 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -574,6 +574,14 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
> }
> EXPORT_SYMBOL(sk_chk_filter);
>
> +/* run from softirq, we must use a work_struct to call
> + * bpf_jit_free() from process context
> + */
> +static void jit_free_defer(struct work_struct *arg)
> +{
> + bpf_jit_free((bpf_func_t)arg);
> +}
> +
> /**
> * sk_filter_release_rcu - Release a socket filter by rcu_head
> * @rcu: rcu_head that contains the sk_filter to free
> @@ -582,7 +590,12 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
> {
> struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
>
> - bpf_jit_free(fp);
> + if (fp->bpf_func != sk_run_filter) {
> + struct work_struct *work = (struct work_struct *)fp->bpf_func;
> +
> + INIT_WORK(work, jit_free_defer);
> + schedule_work(work);
> + }
> kfree(fp);
> }
> EXPORT_SYMBOL(sk_filter_release_rcu);
> @@ -599,9 +612,10 @@ EXPORT_SYMBOL(sk_filter_release_rcu);
> */
> int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
> {
> - struct sk_filter *fp, *old_fp;
> + struct sk_filter *fp, *old_fp, *new_fp;
> unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
> int err;
> + bpf_func_t jit;
>
> /* Make sure new filter is there and in the right amounts. */
> if (fprog->filter == NULL)
> @@ -625,7 +639,14 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
> return err;
> }
>
> - bpf_jit_compile(fp);
> + jit = bpf_jit_compile(fp->insns, fp->len, 1);
> + if (jit) {
> + fp->bpf_func = jit;
> + /* Free unused insns memory */
> + newfp = krealloc(fp, sizeof(*fp), GFP_KERNEL);
> + if (newfp)
> + fp = newfp;
> + }
>
> old_fp = rcu_dereference_protected(sk->sk_filter,
> sock_owned_by_user(sk));
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/