Re: [PATCH net-next 09/11] tuntap: accept an array of XDP buffs through sendmsg()

From: Michael S. Tsirkin
Date: Thu Sep 06 2018 - 13:51:09 EST


On Thu, Sep 06, 2018 at 12:05:24PM +0800, Jason Wang wrote:
> This patch implement TUN_MSG_PTR msg_control type. This type allows
> the caller to pass an array of XDP buffs to tuntap through ptr field
> of the tun_msg_control. If an XDP program is attached, tuntap can run
> XDP program directly. If not, tuntap will build skb and do a fast
> receiving since part of the work has been done by vhost_net.
>
> This will avoid lots of indirect calls thus improves the icache
> utilization and allows to do XDP batched flushing when doing XDP
> redirection.
>
> Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>

Is most of the benefit in batched flushing or skipping
indirect calls? Because if it's flushing we can gain
most of it easily by adding an analog of xmit_more.

> ---
> drivers/net/tun.c | 103 ++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 100 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index c839a4bdcbd9..069db2e5dd08 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -2424,22 +2424,119 @@ static void tun_sock_write_space(struct sock *sk)
> kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
> }
>
> +static int tun_xdp_one(struct tun_struct *tun,
> + struct tun_file *tfile,
> + struct xdp_buff *xdp, int *flush)
> +{
> + struct virtio_net_hdr *gso = xdp->data_hard_start + sizeof(int);
> + struct tun_pcpu_stats *stats;
> + struct bpf_prog *xdp_prog;
> + struct sk_buff *skb = NULL;
> + u32 rxhash = 0, act;
> + int buflen = *(int *)xdp->data_hard_start;
> + int err = 0;
> + bool skb_xdp = false;
> +
> + xdp_prog = rcu_dereference(tun->xdp_prog);
> + if (xdp_prog) {
> + if (gso->gso_type) {
> + skb_xdp = true;
> + goto build;
> + }
> + xdp_set_data_meta_invalid(xdp);
> + xdp->rxq = &tfile->xdp_rxq;
> + act = tun_do_xdp(tun, tfile, xdp_prog, xdp, &err);
> + if (err)
> + goto out;
> + if (act == XDP_REDIRECT)
> + *flush = true;
> + if (act != XDP_PASS)
> + goto out;
> + }
> +
> +build:
> + skb = build_skb(xdp->data_hard_start, buflen);
> + if (!skb) {
> + err = -ENOMEM;
> + goto out;
> + }
> +
> + skb_reserve(skb, xdp->data - xdp->data_hard_start);
> + skb_put(skb, xdp->data_end - xdp->data);
> +
> + if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
> + this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
> + kfree_skb(skb);
> + err = -EINVAL;
> + goto out;
> + }
> +
> + skb->protocol = eth_type_trans(skb, tun->dev);
> + skb_reset_network_header(skb);
> + skb_probe_transport_header(skb, 0);
> +
> + if (skb_xdp) {
> + err = do_xdp_generic(xdp_prog, skb);
> + if (err != XDP_PASS)
> + goto out;
> + }
> +
> + if (!rcu_dereference(tun->steering_prog))
> + rxhash = __skb_get_hash_symmetric(skb);
> +
> + netif_receive_skb(skb);
> +
> + stats = get_cpu_ptr(tun->pcpu_stats);
> + u64_stats_update_begin(&stats->syncp);
> + stats->rx_packets++;
> + stats->rx_bytes += skb->len;
> + u64_stats_update_end(&stats->syncp);
> + put_cpu_ptr(stats);
> +
> + if (rxhash)
> + tun_flow_update(tun, rxhash, tfile);
> +
> +out:
> + return err;
> +}
> +
> static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
> {
> - int ret;
> + int ret, i;
> struct tun_file *tfile = container_of(sock, struct tun_file, socket);
> struct tun_struct *tun = tun_get(tfile);
> struct tun_msg_ctl *ctl = m->msg_control;
> + struct xdp_buff *xdp;
>
> if (!tun)
> return -EBADFD;
>
> - if (ctl && ctl->type != TUN_MSG_UBUF)
> - return -EINVAL;
> + if (ctl && ((ctl->type & 0xF) == TUN_MSG_PTR)) {
> + int n = ctl->type >> 16;
> + int flush = 0;
> +
> + local_bh_disable();
> + rcu_read_lock();
> +
> + for (i = 0; i < n; i++) {
> + xdp = &((struct xdp_buff *)ctl->ptr)[i];
> + tun_xdp_one(tun, tfile, xdp, &flush);
> + }
> +
> + if (flush)
> + xdp_do_flush_map();
> +
> + rcu_read_unlock();
> + local_bh_enable();
> +
> + ret = total_len;
> + goto out;
> + }
>
> ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
> m->msg_flags & MSG_DONTWAIT,
> m->msg_flags & MSG_MORE);
> +out:
> tun_put(tun);
> return ret;
> }
> --
> 2.17.1