Re: [PATCH 2/6] virtio-balloon: speed up inflate/deflate process

From: Michael S. Tsirkin
Date: Fri Jun 24 2016 - 01:40:28 EST


On Mon, Jun 13, 2016 at 05:47:09PM +0800, Liang Li wrote:
> The implementation of the current virtio-balloon is not very efficient,
> Bellow is test result of time spends on inflating the balloon to 3GB of
> a 4GB idle guest:
>
> a. allocating pages (6.5%, 103ms)
> b. sending PFNs to host (68.3%, 787ms)
> c. address translation (6.1%, 96ms)
> d. madvise (19%, 300ms)
>
> It takes about 1577ms for the whole inflating process to complete. The
> test shows that the bottle neck is the stage b and stage d.
>
> If using a bitmap to send the page info instead of the PFNs, we can
> reduce the overhead in stage b quite a lot. Furthermore, it's possible
> to do the address translation and the madvise with a bulk of pages,
> instead of the current page per page way, so the overhead of stage c
> and stage d can also be reduced a lot.
>
> This patch is the kernel side implementation which is intended to speed
> up the inflating & deflating process by adding a new feature to the
> virtio-balloon device. And now, inflating the balloon to 3GB of a 4GB
> idle guest only takes 200ms, it's about 8 times as fast as before.
>
> TODO: optimize stage a by allocating/freeing a chunk of pages instead
> of a single page at a time.
>
> Signed-off-by: Liang Li <liang.z.li@xxxxxxxxx>
> Suggested-by: Michael S. Tsirkin <mst@xxxxxxxxxx>
> Cc: Michael S. Tsirkin <mst@xxxxxxxxxx>
> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Cc: Cornelia Huck <cornelia.huck@xxxxxxxxxx>
> Cc: Amit Shah <amit.shah@xxxxxxxxxx>

Causes kbuild warnings

> ---
> drivers/virtio/virtio_balloon.c | 164 +++++++++++++++++++++++++++++++-----
> include/uapi/linux/virtio_balloon.h | 1 +
> 2 files changed, 144 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 8d649a2..1fa601b 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -40,11 +40,19 @@
> #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
> #define OOM_VBALLOON_DEFAULT_PAGES 256
> #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
> +#define VIRTIO_BALLOON_PFNS_LIMIT ((2 * (1ULL << 30)) >> PAGE_SHIFT) /* 2GB */

2<< 30 is 2G but that is not a useful comment.
pls explain what is the reason for this selection.

>
> static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
> module_param(oom_pages, int, S_IRUSR | S_IWUSR);
> MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
>
> +struct balloon_bmap_hdr {
> + __virtio32 id;
> + __virtio32 page_shift;
> + __virtio64 start_pfn;
> + __virtio64 bmap_len;
> +};
> +

Put this in an uapi header please.

> struct virtio_balloon {
> struct virtio_device *vdev;
> struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
> @@ -62,6 +70,11 @@ struct virtio_balloon {
>
> /* Number of balloon pages we've told the Host we're not using. */
> unsigned int num_pages;
> + /* Bitmap and length used to tell the host the pages */
> + unsigned long *page_bitmap;
> + unsigned long bmap_len;
> + /* Used to record the processed pfn range */
> + unsigned long min_pfn, max_pfn, start_pfn, end_pfn;
> /*
> * The pages we've told the Host we're not using are enqueued
> * at vb_dev_info->pages list.
> @@ -105,15 +118,51 @@ static void balloon_ack(struct virtqueue *vq)
> wake_up(&vb->acked);
> }
>
> +static inline void init_pfn_range(struct virtio_balloon *vb)
> +{
> + vb->min_pfn = (1UL << 48);

Where does this value come from? Do you want ULONG_MAX?
This does not fit in long on 32 bit systems.


> + vb->max_pfn = 0;
> +}
> +
> +static inline void update_pfn_range(struct virtio_balloon *vb,
> + struct page *page)
> +{
> + unsigned long balloon_pfn = page_to_balloon_pfn(page);
> +
> + if (balloon_pfn < vb->min_pfn)
> + vb->min_pfn = balloon_pfn;
> + if (balloon_pfn > vb->max_pfn)
> + vb->max_pfn = balloon_pfn;
> +}
> +
> static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
> {
> - struct scatterlist sg;
> unsigned int len;
>
> - sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP)) {
> + struct balloon_bmap_hdr hdr;

why not init fields here?

> + unsigned long bmap_len;

and here

> + struct scatterlist sg[2];
> +
> + hdr.id = cpu_to_virtio32(vb->vdev, 0);
> + hdr.page_shift = cpu_to_virtio32(vb->vdev, PAGE_SHIFT);
> + hdr.start_pfn = cpu_to_virtio64(vb->vdev, vb->start_pfn);
> + bmap_len = min(vb->bmap_len,
> + (vb->end_pfn - vb->start_pfn) / BITS_PER_BYTE);
> + hdr.bmap_len = cpu_to_virtio64(vb->vdev, bmap_len);
> + sg_init_table(sg, 2);
> + sg_set_buf(&sg[0], &hdr, sizeof(hdr));
> + sg_set_buf(&sg[1], vb->page_bitmap, bmap_len);
> + virtqueue_add_outbuf(vq, sg, 2, vb, GFP_KERNEL);

might fail if queue size < 2. validate queue size and clear
VIRTIO_BALLOON_F_PAGE_BITMAP?

Alternatively, and I think preferably,
use first struct balloon_bmap_hdr bytes in the buffer
to pass the header to host.


> + } else {
> + struct scatterlist sg;
>
> - /* We should always be able to add one buffer to an empty queue. */
> - virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
> + sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> + /* We should always be able to add one buffer to an
> + * empty queue.
> + */
> + virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
> + }
> virtqueue_kick(vq);
>
> /* When host has read buffer, this completes via balloon_ack */
> @@ -133,13 +182,50 @@ static void set_page_pfns(struct virtio_balloon *vb,
> page_to_balloon_pfn(page) + i);
> }
>
> -static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
> +static void set_page_bitmap(struct virtio_balloon *vb,
> + struct list_head *pages, struct virtqueue *vq)
> +{
> + unsigned long pfn;
> + struct page *page, *next;
> + bool find;

find -> found

> +
> + vb->min_pfn = rounddown(vb->min_pfn, BITS_PER_LONG);
> + vb->max_pfn = roundup(vb->max_pfn, BITS_PER_LONG);
> + for (pfn = vb->min_pfn; pfn < vb->max_pfn;
> + pfn += VIRTIO_BALLOON_PFNS_LIMIT) {
> + vb->start_pfn = pfn;
> + vb->end_pfn = pfn;
> + memset(vb->page_bitmap, 0, vb->bmap_len);
> + find = false;
> + list_for_each_entry_safe(page, next, pages, lru) {

Why _safe?

> + unsigned long balloon_pfn = page_to_balloon_pfn(page);
> +
> + if (balloon_pfn < pfn ||
> + balloon_pfn >= pfn + VIRTIO_BALLOON_PFNS_LIMIT)
> + continue;
> + set_bit(balloon_pfn - pfn, vb->page_bitmap);
> + if (balloon_pfn > vb->end_pfn)
> + vb->end_pfn = balloon_pfn;
> + find = true;

maybe remove page from list? this way we won't go over same entry
multiple times ...

> + }
> + if (find) {
> + vb->end_pfn = roundup(vb->end_pfn, BITS_PER_LONG);
> + tell_host(vb, vq);
> + }
> + }
> +}
> +
> +static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num,
> + bool use_bmap)
> {
> struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> unsigned num_allocated_pages;
>
> - /* We can only do one array worth at a time. */
> - num = min(num, ARRAY_SIZE(vb->pfns));
> + if (use_bmap)
> + init_pfn_range(vb);
> + else
> + /* We can only do one array worth at a time. */
> + num = min(num, ARRAY_SIZE(vb->pfns));
>
> mutex_lock(&vb->balloon_lock);
> for (vb->num_pfns = 0; vb->num_pfns < num;
> @@ -154,7 +240,10 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
> msleep(200);
> break;
> }
> - set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
> + if (use_bmap)
> + update_pfn_range(vb, page);
> + else
> + set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
> vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
> if (!virtio_has_feature(vb->vdev,
> VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> @@ -163,8 +252,13 @@ static unsigned fill_balloon(struct virtio_balloon *vb, size_t num)
>
> num_allocated_pages = vb->num_pfns;
> /* Did we get any? */
> - if (vb->num_pfns != 0)
> - tell_host(vb, vb->inflate_vq);
> + if (vb->num_pfns != 0) {
> + if (use_bmap)
> + set_page_bitmap(vb, &vb_dev_info->pages,
> + vb->inflate_vq);

don't we need pages_lock if we access vb_dev_info->pages?

> + else
> + tell_host(vb, vb->inflate_vq);
> + }
> mutex_unlock(&vb->balloon_lock);
>
> return num_allocated_pages;
> @@ -184,15 +278,19 @@ static void release_pages_balloon(struct virtio_balloon *vb,
> }
> }
>
> -static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
> +static unsigned int leak_balloon(struct virtio_balloon *vb, size_t num,
> + bool use_bmap)
> {
> unsigned num_freed_pages;
> struct page *page;
> struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
> LIST_HEAD(pages);
>
> - /* We can only do one array worth at a time. */
> - num = min(num, ARRAY_SIZE(vb->pfns));
> + if (use_bmap)
> + init_pfn_range(vb);
> + else
> + /* We can only do one array worth at a time. */
> + num = min(num, ARRAY_SIZE(vb->pfns));
>
> mutex_lock(&vb->balloon_lock);
> for (vb->num_pfns = 0; vb->num_pfns < num;
> @@ -200,7 +298,10 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
> page = balloon_page_dequeue(vb_dev_info);
> if (!page)
> break;
> - set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
> + if (use_bmap)
> + update_pfn_range(vb, page);
> + else
> + set_page_pfns(vb, vb->pfns + vb->num_pfns, page);
> list_add(&page->lru, &pages);
> vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
> }
> @@ -211,9 +312,14 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
> * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
> * is true, we *have* to do it in this order
> */
> - if (vb->num_pfns != 0)
> - tell_host(vb, vb->deflate_vq);
> - release_pages_balloon(vb, &pages);
> + if (vb->num_pfns != 0) {
> + if (use_bmap)
> + set_page_bitmap(vb, &pages, vb->deflate_vq);
> + else
> + tell_host(vb, vb->deflate_vq);
> +
> + release_pages_balloon(vb, &pages);
> + }
> mutex_unlock(&vb->balloon_lock);
> return num_freed_pages;
> }
> @@ -347,13 +453,15 @@ static int virtballoon_oom_notify(struct notifier_block *self,
> struct virtio_balloon *vb;
> unsigned long *freed;
> unsigned num_freed_pages;
> + bool use_bmap;
>
> vb = container_of(self, struct virtio_balloon, nb);
> if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> return NOTIFY_OK;
>
> freed = parm;
> - num_freed_pages = leak_balloon(vb, oom_pages);
> + use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
> + num_freed_pages = leak_balloon(vb, oom_pages, use_bmap);
> update_balloon_size(vb);
> *freed += num_freed_pages;
>
> @@ -373,15 +481,17 @@ static void update_balloon_size_func(struct work_struct *work)
> {
> struct virtio_balloon *vb;
> s64 diff;
> + bool use_bmap;
>
> vb = container_of(work, struct virtio_balloon,
> update_balloon_size_work);
> + use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
> diff = towards_target(vb);
>
> if (diff > 0)
> - diff -= fill_balloon(vb, diff);
> + diff -= fill_balloon(vb, diff, use_bmap);
> else if (diff < 0)
> - diff += leak_balloon(vb, -diff);
> + diff += leak_balloon(vb, -diff, use_bmap);
> update_balloon_size(vb);
>
> if (diff)
> @@ -508,6 +618,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
> spin_lock_init(&vb->stop_update_lock);
> vb->stop_update = false;
> vb->num_pages = 0;
> + vb->bmap_len = ALIGN(VIRTIO_BALLOON_PFNS_LIMIT, BITS_PER_LONG) /
> + BITS_PER_BYTE + 2 * sizeof(unsigned long);
> + vb->page_bitmap = kzalloc(vb->bmap_len, GFP_KERNEL);
> + if (!vb->page_bitmap) {
> + err = -ENOMEM;
> + goto out;
> + }

How about we clear the bitmap feature on this failure?

> mutex_init(&vb->balloon_lock);
> init_waitqueue_head(&vb->acked);
> vb->vdev = vdev;
> @@ -541,9 +658,12 @@ out:
>
> static void remove_common(struct virtio_balloon *vb)
> {
> + bool use_bmap;
> +
> + use_bmap = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_PAGE_BITMAP);
> /* There might be pages left in the balloon: free them. */
> while (vb->num_pages)
> - leak_balloon(vb, vb->num_pages);
> + leak_balloon(vb, vb->num_pages, use_bmap);
> update_balloon_size(vb);
>
> /* Now we reset the device so we can clean up the queues. */
> @@ -565,6 +685,7 @@ static void virtballoon_remove(struct virtio_device *vdev)
> cancel_work_sync(&vb->update_balloon_stats_work);
>
> remove_common(vb);
> + kfree(vb->page_bitmap);
> kfree(vb);
> }
>
> @@ -603,6 +724,7 @@ static unsigned int features[] = {
> VIRTIO_BALLOON_F_MUST_TELL_HOST,
> VIRTIO_BALLOON_F_STATS_VQ,
> VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> + VIRTIO_BALLOON_F_PAGE_BITMAP,
> };
>
> static struct virtio_driver virtio_balloon_driver = {
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index 343d7dd..f78fa47 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -34,6 +34,7 @@
> #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
> #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */
> #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */
> +#define VIRTIO_BALLOON_F_PAGE_BITMAP 3 /* Send page info with bitmap */
>
> /* Size of a PFN in the balloon interface. */
> #define VIRTIO_BALLOON_PFN_SHIFT 12
> --
> 1.9.1