Re: [PATCH net-next V2 2/2] tun: allow to attach ebpf socket filter
From: Willem de Bruijn
Date: Fri Jan 05 2018 - 11:22:12 EST
On Fri, Jan 5, 2018 at 4:54 AM, Jason Wang <jasowang@xxxxxxxxxx> wrote:
> This patch allows userspace to attach eBPF filter to tun. This will
> allow to implement VM dataplane filtering in a more efficient way
> compared to cBPF filter by allowing either qemu or libvirt to
> attach eBPF filter to tun.
>
> Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>
> ---
> drivers/net/tun.c | 39 +++++++++++++++++++++++++++++++++++----
> include/uapi/linux/if_tun.h | 1 +
> 2 files changed, 36 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 0853829..9fc8b70 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -238,6 +238,12 @@ struct tun_struct {
> struct tun_pcpu_stats __percpu *pcpu_stats;
> struct bpf_prog __rcu *xdp_prog;
> struct tun_prog __rcu *steering_prog;
> + struct tun_prog __rcu *filter_prog;
> +};
> +
> +struct veth {
> + __be16 h_vlan_proto;
> + __be16 h_vlan_TCI;
> };
>
> static int tun_napi_receive(struct napi_struct *napi, int budget)
> @@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
> #endif
> }
>
> +static unsigned int run_ebpf_filter(struct tun_struct *tun,
> + struct sk_buff *skb,
> + int len)
> +{
> + struct tun_prog *prog = rcu_dereference(tun->filter_prog);
> +
> + if (prog)
> + len = bpf_prog_run_clear_cb(prog->prog, skb);
> +
> + return len;
> +}
> +
> /* Net device start xmit */
> static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> {
> struct tun_struct *tun = netdev_priv(dev);
> int txq = skb->queue_mapping;
> struct tun_file *tfile;
> + int len = skb->len;
>
> rcu_read_lock();
> tfile = rcu_dereference(tun->tfiles[txq]);
> @@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
> sk_filter(tfile->socket.sk, skb))
> goto drop;
>
> + len = run_ebpf_filter(tun, skb, len);
> +
> + /* Trim extra bytes since we may inster vlan proto & TCI
inster -> insert
> + * in tun_put_user().
> + */
> + if (skb_vlan_tag_present(skb))
> + len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;
no need for testing skb_vlan_tag_present twice.
more importantly, why trim these bytes unconditionally?
only if the filter trims a packet to a length shorter than the the minimum
could this cause problems. sk_filter_trim_cap with a lower bound avoids
that: skb_vlan_tag_present(skb) ? sizeof(struct vlan_ethhdr) : 0;