Re: [RFC PATCH net-next 10/12] vhost_net: build xdp buff

From: Jason Wang
Date: Tue May 22 2018 - 07:47:10 EST




On 2018å05æ22æ 00:56, Jesse Brandeburg wrote:
On Mon, 21 May 2018 17:04:31 +0800 Jason wrote:
This patch implement build XDP buffers in vhost_net. The idea is do
userspace copy in vhost_net and build XDP buff based on the
page. Vhost_net can then submit one or an array of XDP buffs to
underlayer socket (e.g TUN). TUN can choose to do XDP or call
build_skb() to build skb. To support build skb, vnet header were also
stored into the header of the XDP buff.

This userspace copy and XDP buffs building is key to achieve XDP
batching in TUN, since TUN does not need to care about userspace copy
and then can disable premmption for several XDP buffs to achieve
batching from XDP.

TODO: reserve headroom based on the TUN XDP.

Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>
---
drivers/vhost/net.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f0639d7..1209e84 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -492,6 +492,80 @@ static bool vhost_has_more_pkts(struct vhost_net *net,
likely(!vhost_exceeds_maxpend(net));
}
+#define VHOST_NET_HEADROOM 256
+#define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
+
+static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
+ struct iov_iter *from,
+ struct xdp_buff *xdp)
+{
+ struct vhost_virtqueue *vq = &nvq->vq;
+ struct page_frag *alloc_frag = &current->task_frag;
+ struct virtio_net_hdr *gso;
+ size_t len = iov_iter_count(from);
+ int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ int pad = SKB_DATA_ALIGN(VHOST_NET_RX_PAD + VHOST_NET_HEADROOM
+ + nvq->sock_hlen);
+ int sock_hlen = nvq->sock_hlen;
+ void *buf;
+ int copied;
+
+ if (len < nvq->sock_hlen)
+ return -EFAULT;
+
+ if (SKB_DATA_ALIGN(len + pad) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
+ return -ENOSPC;
+
+ buflen += SKB_DATA_ALIGN(len + pad);
maybe store the result of SKB_DATA_ALIGN in a local instead of doing
the work twice?

Ok.


+ alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
+ if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
+ return -ENOMEM;
+
+ buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
+
+ /* We store two kinds of metadata in the header which will be
+ * used for XDP_PASS to do build_skb():
+ * offset 0: buflen
+ * offset sizeof(int): vnet header
+ */
+ copied = copy_page_from_iter(alloc_frag->page,
+ alloc_frag->offset + sizeof(int), sock_hlen, from);
+ if (copied != sock_hlen)
+ return -EFAULT;
+
+ gso = (struct virtio_net_hdr *)(buf + sizeof(int));
+
+ if ((gso->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
+ vhost16_to_cpu(vq, gso->csum_start) +
+ vhost16_to_cpu(vq, gso->csum_offset) + 2 >
+ vhost16_to_cpu(vq, gso->hdr_len)) {
+ gso->hdr_len = cpu_to_vhost16(vq,
+ vhost16_to_cpu(vq, gso->csum_start) +
+ vhost16_to_cpu(vq, gso->csum_offset) + 2);
+
+ if (vhost16_to_cpu(vq, gso->hdr_len) > len)
+ return -EINVAL;
+ }
+
+ len -= sock_hlen;
+ copied = copy_page_from_iter(alloc_frag->page,
+ alloc_frag->offset + pad,
+ len, from);
+ if (copied != len)
+ return -EFAULT;
+
+ xdp->data_hard_start = buf;
+ xdp->data = buf + pad;
+ xdp->data_end = xdp->data + len;
+ *(int *)(xdp->data_hard_start)= buflen;
space before =

Yes.

Thanks


+
+ get_page(alloc_frag->page);
+ alloc_frag->offset += buflen;
+
+ return 0;
+}
+
static void handle_tx_copy(struct vhost_net *net)
{
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];