[RFC PATCH net-next 10/12] vhost_net: build xdp buff
From: Jason Wang
Date: Mon May 21 2018 - 04:10:37 EST
This patch implement build XDP buffers in vhost_net. The idea is do
userspace copy in vhost_net and build XDP buff based on the
page. Vhost_net can then submit one or an array of XDP buffs to
underlayer socket (e.g TUN). TUN can choose to do XDP or call
build_skb() to build skb. To support build skb, vnet header were also
stored into the header of the XDP buff.
This userspace copy and XDP buffs building is key to achieve XDP
batching in TUN, since TUN does not need to care about userspace copy
and then can disable premmption for several XDP buffs to achieve
batching from XDP.
TODO: reserve headroom based on the TUN XDP.
Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>
---
drivers/vhost/net.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f0639d7..1209e84 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -492,6 +492,80 @@ static bool vhost_has_more_pkts(struct vhost_net *net,
likely(!vhost_exceeds_maxpend(net));
}
+#define VHOST_NET_HEADROOM 256
+#define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
+
+static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
+ struct iov_iter *from,
+ struct xdp_buff *xdp)
+{
+ struct vhost_virtqueue *vq = &nvq->vq;
+ struct page_frag *alloc_frag = ¤t->task_frag;
+ struct virtio_net_hdr *gso;
+ size_t len = iov_iter_count(from);
+ int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ int pad = SKB_DATA_ALIGN(VHOST_NET_RX_PAD + VHOST_NET_HEADROOM
+ + nvq->sock_hlen);
+ int sock_hlen = nvq->sock_hlen;
+ void *buf;
+ int copied;
+
+ if (len < nvq->sock_hlen)
+ return -EFAULT;
+
+ if (SKB_DATA_ALIGN(len + pad) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
+ return -ENOSPC;
+
+ buflen += SKB_DATA_ALIGN(len + pad);
+ alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
+ if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
+ return -ENOMEM;
+
+ buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
+
+ /* We store two kinds of metadata in the header which will be
+ * used for XDP_PASS to do build_skb():
+ * offset 0: buflen
+ * offset sizeof(int): vnet header
+ */
+ copied = copy_page_from_iter(alloc_frag->page,
+ alloc_frag->offset + sizeof(int), sock_hlen, from);
+ if (copied != sock_hlen)
+ return -EFAULT;
+
+ gso = (struct virtio_net_hdr *)(buf + sizeof(int));
+
+ if ((gso->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
+ vhost16_to_cpu(vq, gso->csum_start) +
+ vhost16_to_cpu(vq, gso->csum_offset) + 2 >
+ vhost16_to_cpu(vq, gso->hdr_len)) {
+ gso->hdr_len = cpu_to_vhost16(vq,
+ vhost16_to_cpu(vq, gso->csum_start) +
+ vhost16_to_cpu(vq, gso->csum_offset) + 2);
+
+ if (vhost16_to_cpu(vq, gso->hdr_len) > len)
+ return -EINVAL;
+ }
+
+ len -= sock_hlen;
+ copied = copy_page_from_iter(alloc_frag->page,
+ alloc_frag->offset + pad,
+ len, from);
+ if (copied != len)
+ return -EFAULT;
+
+ xdp->data_hard_start = buf;
+ xdp->data = buf + pad;
+ xdp->data_end = xdp->data + len;
+ *(int *)(xdp->data_hard_start)= buflen;
+
+ get_page(alloc_frag->page);
+ alloc_frag->offset += buflen;
+
+ return 0;
+}
+
static void handle_tx_copy(struct vhost_net *net)
{
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
--
2.7.4