Re: [PATCH net-next 3/5] virtio_net: Add page pool fragmentation support

From: Liang Chen
Date: Sat May 27 2023 - 08:36:48 EST


On Fri, May 26, 2023 at 4:29 PM Horatiu Vultur
<horatiu.vultur@xxxxxxxxxxxxx> wrote:
>
> The 05/26/2023 13:46, Liang Chen wrote:
>
> Hi Liang,
>
> >
> > To further enhance performance, implement page pool fragmentation
> > support and introduce a module parameter to enable or disable it.
> >
> > In single-core vm testing environments, there is an additional performance
> > gain observed in the normal path compared to the one packet per page
> > approach.
> > Upstream codebase: 47.5 Gbits/sec
> > Upstream codebase with page pool: 50.2 Gbits/sec
> > Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec
> >
> > There is also some performance gain for XDP cpumap.
> > Upstream codebase: 1.38 Gbits/sec
> > Upstream codebase with page pool: 9.74 Gbits/sec
> > Upstream codebase with page pool fragmentation: 10.3 Gbits/sec
> >
> > Signed-off-by: Liang Chen <liangchen.linux@xxxxxxxxx>
> > ---
> > drivers/net/virtio_net.c | 72 ++++++++++++++++++++++++++++++----------
> > 1 file changed, 55 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 99c0ca0c1781..ac40b8c66c59 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -32,7 +32,9 @@ module_param(gso, bool, 0444);
> > module_param(napi_tx, bool, 0644);
> >
> > static bool page_pool_enabled;
> > +static bool page_pool_frag;
> > module_param(page_pool_enabled, bool, 0400);
> > +module_param(page_pool_frag, bool, 0400);
> >
> > /* FIXME: MTU in config. */
> > #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
> > @@ -909,23 +911,32 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
> > struct page *p,
> > int offset,
> > int page_off,
> > - unsigned int *len)
> > + unsigned int *len,
> > + unsigned int *pp_frag_offset)
>
> The 'unsigned int *pp_frag_offset' seems to be unaligned.
>

Sure, Thanks!
> > {
> > int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> > struct page *page;
> > + unsigned int pp_frag_offset_val;
>
> Please use reverse christmas tree notation here. The pp_frag_offset_val
> needs to be declared before page;
>

Sure. Will do on v2.
> >
> > if (page_off + *len + tailroom > PAGE_SIZE)
> > return NULL;
> >
> > if (rq->page_pool)
> > - page = page_pool_dev_alloc_pages(rq->page_pool);
> > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
> > + page = page_pool_dev_alloc_frag(rq->page_pool, pp_frag_offset,
> > + PAGE_SIZE);
>
> Don't you need to check if pp_frag_offset is null? As you call once with
> NULL.
>

At the moment, page_pool is enabled only for mergeable mode, and the
path leading to a call with NULL pp_frag_offset is from small mode.
But I will evaluate again whether it is beneficial to support
page_pool for small mode on v2. Thanks.
> > + else
> > + page = page_pool_dev_alloc_pages(rq->page_pool);
> > else
> > page = alloc_page(GFP_ATOMIC);
> >
> > if (!page)
> > return NULL;
> >
> > - memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
> > + pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0;
> > +
> > + memcpy(page_address(page) + page_off + pp_frag_offset_val,
> > + page_address(p) + offset, *len);
> > page_off += *len;
> >
> > while (--*num_buf) {
> > @@ -948,7 +959,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
> > goto err_buf;
> > }
> >
> > - memcpy(page_address(page) + page_off,
> > + memcpy(page_address(page) + page_off + pp_frag_offset_val,
> > page_address(p) + off, buflen);
> > page_off += buflen;
> > virtnet_put_page(rq, p);
> > @@ -1029,7 +1040,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev,
> > SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> > xdp_page = xdp_linearize_page(rq, &num_buf, page,
> > offset, header_offset,
> > - &tlen);
> > + &tlen, NULL);
> > if (!xdp_page)
> > goto err_xdp;
> >
> > @@ -1323,6 +1334,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> > unsigned int headroom = mergeable_ctx_to_headroom(ctx);
> > struct page *xdp_page;
> > unsigned int xdp_room;
> > + unsigned int page_frag_offset = 0;
>
> Please use reverse x-mas tree notation.
>

Sure. Will do on v2.
> >
> > /* Transient failure which in theory could occur if
> > * in-flight packets from before XDP was enabled reach
> > @@ -1356,7 +1368,8 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> > xdp_page = xdp_linearize_page(rq, num_buf,
> > *page, offset,
> > VIRTIO_XDP_HEADROOM,
> > - len);
> > + len,
> > + &page_frag_offset);
>
> You have also here some misalignment with regards to page_frag_offset.
>

Sure, Thanks!
> > if (!xdp_page)
> > return NULL;
> > } else {
> > @@ -1366,14 +1379,19 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> > return NULL;
> >
> > if (rq->page_pool)
> > - xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
> > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
> > + xdp_page = page_pool_dev_alloc_frag(rq->page_pool,
> > + &page_frag_offset, PAGE_SIZE);
> > + else
> > + xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
> > else
> > xdp_page = alloc_page(GFP_ATOMIC);
> > +
> > if (!xdp_page)
> > return NULL;
> >
> > - memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
> > - page_address(*page) + offset, *len);
> > + memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM +
> > + page_frag_offset, page_address(*page) + offset, *len);
> > }
> >
> > *frame_sz = PAGE_SIZE;
> > @@ -1382,7 +1400,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
> >
> > *page = xdp_page;
> >
> > - return page_address(*page) + VIRTIO_XDP_HEADROOM;
> > + return page_address(*page) + VIRTIO_XDP_HEADROOM + page_frag_offset;
> > }
> >
> > static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
> > @@ -1762,6 +1780,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
> > void *ctx;
> > int err;
> > unsigned int len, hole;
> > + unsigned int pp_frag_offset;
>
> There same here.
>

Sure, Thanks!

> >
> > /* Extra tailroom is needed to satisfy XDP's assumption. This
> > * means rx frags coalescing won't work, but consider we've
> > @@ -1769,13 +1788,29 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
> > */
> > len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
> > if (rq->page_pool) {
> > - struct page *page;
> > + if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG) {
> > + if (unlikely(!page_pool_dev_alloc_frag(rq->page_pool,
> > + &pp_frag_offset, len + room)))
> > + return -ENOMEM;
> > + buf = (char *)page_address(rq->page_pool->frag_page) +
> > + pp_frag_offset;
> > + buf += headroom; /* advance address leaving hole at front of pkt */
> > + hole = (PAGE_SIZE << rq->page_pool->p.order)
> > + - rq->page_pool->frag_offset;
> > + if (hole < len + room) {
> > + if (!headroom)
> > + len += hole;
> > + rq->page_pool->frag_offset += hole;
> > + }
> > + } else {
> > + struct page *page;
> >
> > - page = page_pool_dev_alloc_pages(rq->page_pool);
> > - if (unlikely(!page))
> > - return -ENOMEM;
> > - buf = (char *)page_address(page);
> > - buf += headroom; /* advance address leaving hole at front of pkt */
> > + page = page_pool_dev_alloc_pages(rq->page_pool);
> > + if (unlikely(!page))
> > + return -ENOMEM;
> > + buf = (char *)page_address(page);
> > + buf += headroom; /* advance address leaving hole at front of pkt */
> > + }
> > } else {
> > if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
> > return -ENOMEM;
> > @@ -3800,13 +3835,16 @@ static void virtnet_alloc_page_pool(struct receive_queue *rq)
> > struct virtio_device *vdev = rq->vq->vdev;
> >
> > struct page_pool_params pp_params = {
> > - .order = 0,
> > + .order = page_pool_frag ? SKB_FRAG_PAGE_ORDER : 0,
> > .pool_size = rq->vq->num_max,
> > .nid = dev_to_node(vdev->dev.parent),
> > .dev = vdev->dev.parent,
> > .offset = 0,
> > };
> >
> > + if (page_pool_frag)
> > + pp_params.flags |= PP_FLAG_PAGE_FRAG;
> > +
> > rq->page_pool = page_pool_create(&pp_params);
> > if (IS_ERR(rq->page_pool)) {
> > dev_warn(&vdev->dev, "page pool creation failed: %ld\n",
> > --
> > 2.31.1
> >
> >
>
> --
> /Horatiu