Re: PATCH v2 3/4] Defer skb allocation -- new recvbuf alloc &receive calls

From: Michael S. Tsirkin
Date: Sun Dec 13 2009 - 06:46:24 EST


On Fri, Dec 11, 2009 at 04:46:53AM -0800, Shirley Ma wrote:
> Signed-off-by: Shirley Ma <xma@xxxxxxxxxx>
> -------------
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 100b4b9..dde8060 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -203,6 +203,73 @@ static struct sk_buff *skb_goodcopy(struct virtnet_info *vi, struct page **page,
> return skb;
> }
>
> +static struct sk_buff *receive_big(struct virtnet_info *vi, struct page *page,
> + unsigned int len)
> +{
> + struct sk_buff *skb;
> +
> + skb = skb_goodcopy(vi, &page, &len);
> + if (unlikely(!skb))
> + return NULL;
> +
> + while (len > 0) {
> + len = skb_set_frag(skb, page, 0, len);
> + page = (struct page *)page->private;

Interesting. I think skb_goodcopy will sometimes
set *page to NULL. Will the above crash then?

> + }
> +
> + if (page)
> + give_pages(vi, page);
> +
> + return skb;
> +}
> +
> +static struct sk_buff *receive_mergeable(struct virtnet_info *vi,
> + struct page *page, unsigned int len)
> +{
> + struct sk_buff *skb;
> + struct skb_vnet_hdr *hdr;
> + int num_buf, i;
> +
> + if (len > PAGE_SIZE)
> + len = PAGE_SIZE;
> +
> + skb = skb_goodcopy(vi, &page, &len);
> +

don't put empty line here. if below is part of same logical block as
skb_goodcopy.

> + if (unlikely(!skb))
> + return NULL;

don't we care that *page might not be NULL? why not?

> +
> + hdr = skb_vnet_hdr(skb);
> + num_buf = hdr->mhdr.num_buffers;
> + while (--num_buf) {
> + struct page *page;

Local variable shadows a parameter.
It seems gcc will let you get away with a warning,
but this is not legal C.

> +
> + i = skb_shinfo(skb)->nr_frags;
> + if (i >= MAX_SKB_FRAGS) {
> + pr_debug("%s: packet too long %d\n", skb->dev->name,
> + len);


If this happens, we have corrupted memory already.
We do need this check, but please put is before you increment
nr_frags.

> + skb->dev->stats.rx_length_errors++;
> + return skb;

This will propagate the error up the stack and corrupt
more memory.

> + }
> +
> + page = vi->rvq->vq_ops->get_buf(vi->rvq, &len);
> + if (!page) {
> + pr_debug("%s: rx error: %d buffers missing\n",
> + skb->dev->name, hdr->mhdr.num_buffers);
> + skb->dev->stats.rx_length_errors++;
> + return skb;

Here, skb is some random part of packet, don't propagate
it up the stack.

> + }
> +
> + if (len > PAGE_SIZE)
> + len = PAGE_SIZE;
> +
> + skb_set_frag(skb, page, 0, len);
> +
> + vi->num--;
> + }
> +
> + return skb;
> +}
> +
> static void receive_skb(struct net_device *dev, struct sk_buff *skb,
> unsigned len)
> {
> @@ -356,6 +423,103 @@ drop:
> dev_kfree_skb(skb);
> }
>
> +static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp, bool *oom)
> +{
> + struct sk_buff *skb;
> + struct skb_vnet_hdr *hdr;
> + struct scatterlist sg[2];
> + int err = 0;
> +
> + skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN);
> + if (unlikely(!skb)) {
> + *oom = true;
> + return err;
> + }
> +
> + skb_reserve(skb, NET_IP_ALIGN);
> + skb_put(skb, MAX_PACKET_LEN);
> +
> + hdr = skb_vnet_hdr(skb);
> + sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr));

sizeof hdr->hdr

> +
> + skb_to_sgvec(skb, sg+1, 0, skb->len);

space around +

> +
> + err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 2, skb);
> + if (err < 0)
> + kfree_skb(skb);
> + else
> + skb_queue_head(&vi->recv, skb);

So why are we queueing this still?

> +
> + return err;
> +}
> +
> +static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp, bool *oom)
> +{
> + struct scatterlist sg[2 + MAX_SKB_FRAGS];

MAX_SKB_FRAGS + 2 will be more readable.
Also, create a macro for this constant and document
why does +2 make sense?

> + int total = MAX_SKB_FRAGS + 2;
> + char *p;
> + int err = 0;
> + int i, offset;
> + struct page *first = NULL;
> + struct page *page;
> + /* share one page between virtio_net header and data */
> + struct padded_vnet_hdr {
> + struct virtio_net_hdr hdr;
> + /* This padding makes our data 16 byte aligned */
> + char padding[6];

Again, pls explain *why* do we want 16 byte alignment.
Also this code seems duplicated?
Please put structs at top of file where they
can be found.

> + };
> +
> + offset = sizeof(struct padded_vnet_hdr);
> +
> + for (i = total - 1; i > 0; i--) {

I prefer --i.
Also, total is just a constant.
So simply MAX_SKB_FRAGS + 1 will be clearer.
Why do we scan last to first?
If there's reason, please add a comment.

> + page = get_a_page(vi, gfp);
> + if (!page) {
> + if (first)
> + give_pages(vi, first);
> + *oom = true;
> + break;
> + }
> +
> + p = page_address(page);
> + page->private = (unsigned long)first;
> + first = page;
> +
> + /* allocate MAX_SKB_FRAGS + 1 pages for big packets */
> + if (i == 1) {
> + sg_set_buf(&sg[i-1], p, sizeof(struct virtio_net_hdr));

space around - .
All the if (i == 1) handling on exit is really hard to grok.
How about moving common code out of this loop
into a function, and then you can
for (i = total - 1; i > 1; i--) {
handle(i);
}
handle(1);
handle(0);
add_buf




> + sg_set_buf(&sg[i], p + offset, PAGE_SIZE - offset);
> + err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, total,
> + first);
> + if (err < 0)
> + give_pages(vi, first);
> + } else
> + sg_set_buf(&sg[i], p, PAGE_SIZE);
> + }
> +
> + return err;
> +}
> +
> +static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp, bool *oom)


do we really need *oom here and below?
We can just set err to ENOMEM, no?

> +{
> + struct page *page;
> + struct scatterlist sg;
> + int err = 0;
> +
> + page = get_a_page(vi, gfp);
> + if (!page) {
> + *oom = true;
> + return err;

Please do not return 0 on failure.

> + }
> +
> + sg_init_one(&sg, page_address(page), PAGE_SIZE);
> +
> + err = vi->rvq->vq_ops->add_buf(vi->rvq, &sg, 0, 1, page);
> + if (err < 0)
> + give_pages(vi, page);
> +
> + return err;
> +}
> +
> static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp)
> {
> struct sk_buff *skb;
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/