On Thu, Apr 13, 2017 at 05:35:05PM +0800, Wei Wang wrote:Right. bitmap is the way to gather pages to chunk.
So we don't need the bitmap to talk to host, it is just
a data structure we chose to maintain lists of pages, right?
OK as far as it goes but you need much better isolation for it."#define PAGE_CHUNK_TYPE_UNUSED 1" is added in another patch.
Build a data structure with APIs such as _init, _cleanup, _add, _clear,
_find_first, _find_next.
Completely unrelated to pages, it just maintains bits.
Then use it here.
static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;Doesn't look like you are ever adding more types in this
module_param(oom_pages, int, S_IRUSR | S_IWUSR);
MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
@@ -50,6 +54,10 @@ MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
static struct vfsmount *balloon_mnt;
#endif
+/* Types of pages to chunk */
+#define PAGE_CHUNK_TYPE_BALLOON 0
+
patchset. Pls keep code simple, generalize it later.
+#define MAX_PAGE_CHUNKS 4096This is an order-4 allocation. I'd make it 4095 and then it's
an order-3 one.
struct virtio_balloon {cover? what does this mean?
struct virtio_device *vdev;
struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
@@ -78,6 +86,32 @@ struct virtio_balloon {
/* Synchronize access/update to this struct virtio_balloon elements */
struct mutex balloon_lock;
+ /*
+ * Buffer for PAGE_CHUNK_TYPE_BALLOON:
+ * virtio_balloon_page_chunk_hdr +
+ * virtio_balloon_page_chunk * MAX_PAGE_CHUNKS
+ */
+ struct virtio_balloon_page_chunk_hdr *balloon_page_chunk_hdr;
+ struct virtio_balloon_page_chunk *balloon_page_chunk;
+
+ /* Bitmap used to record pages */
+ unsigned long *page_bmap[PAGE_BMAP_COUNT_MAX];
+ /* Number of the allocated page_bmap */
+ unsigned int page_bmaps;
+
+ /*
+ * The allocated page_bmap size may be smaller than the pfn range of
+ * the ballooned pages. In this case, we need to use the page_bmap
+ * multiple times to cover the entire pfn range. It's like using a
+ * short ruler several times to finish measuring a long object.
+ * The start location of the ruler in the next measurement is the end
+ * location of the ruler in the previous measurement.
+ *
+ * pfn_max & pfn_min: forms the pfn range of the ballooned pages
+ * pfn_start & pfn_stop: records the start and stop pfn in each cover
looks like you only use these to pass data to tell_host.
so pass these as parameters and you won't need to keep
them in this structure.
And then you can move this comment to set_page_bmap where
it belongs.
+ */did you mean
+ unsigned long pfn_min, pfn_max, pfn_start, pfn_stop;
+
/* The array of pfns we tell the Host about. */
unsigned int num_pfns;
__virtio32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
@@ -110,20 +144,201 @@ static void balloon_ack(struct virtqueue *vq)
wake_up(&vb->acked);
}
-static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
+static inline void init_page_bmap_range(struct virtio_balloon *vb)
+{
+ vb->pfn_min = ULONG_MAX;
+ vb->pfn_max = 0;
+}
+
+static inline void update_page_bmap_range(struct virtio_balloon *vb,
+ struct page *page)
+{
+ unsigned long balloon_pfn = page_to_balloon_pfn(page);
+
+ vb->pfn_min = min(balloon_pfn, vb->pfn_min);
+ vb->pfn_max = max(balloon_pfn, vb->pfn_max);
+}
+
+/* The page_bmap size is extended by adding more number of page_bmap */
Allocate more bitmaps to cover the given number of pfns
and add them to page_bmap
?
This isn't what this function does.
It blindly assumes 1 bitmap is allocated
and allocates more, up to PAGE_BMAP_COUNT_MAX.
+static void extend_page_bmap_size(struct virtio_balloon *vb,Align? PAGE_BMAP_SIZE doesn't even have to be a power of 2 ...
+ unsigned long pfns)
+{
+ int i, bmaps;
+ unsigned long bmap_len;
+
+ bmap_len = ALIGN(pfns, BITS_PER_LONG) / BITS_PER_BYTE;
+ bmap_len = ALIGN(bmap_len, PAGE_BMAP_SIZE);
+ bmaps = min((int)(bmap_len / PAGE_BMAP_SIZE),I got lost here.
+ PAGE_BMAP_COUNT_MAX);
Please use things like ARRAY_SIZE instead of macros.
+What's the magic number 1 here?
+ for (i = 1; i < bmaps; i++) {
+ vb->page_bmap[i] = kmalloc(PAGE_BMAP_SIZE, GFP_KERNEL);
+ if (vb->page_bmap[i])
+ vb->page_bmaps++;
+ else
+ break;
+ }
+}
+
+static void free_extended_page_bmap(struct virtio_balloon *vb)
+{
+ int i, bmaps = vb->page_bmaps;
+
+ for (i = 1; i < bmaps; i++) {
+ kfree(vb->page_bmap[i]);
+ vb->page_bmap[i] = NULL;
+ vb->page_bmaps--;
+ }
+}
+
Maybe you want to document what is going on.
Here's a guess:
We keep a single bmap around at all times.
If memory does not fit there, we allocate up to
PAGE_BMAP_COUNT_MAX of chunks.
+static void free_page_bmap(struct virtio_balloon *vb)busy_wait seems unused. pls drop.
+{
+ int i;
+
+ for (i = 0; i < vb->page_bmaps; i++)
+ kfree(vb->page_bmap[i]);
+}
+
+static void clear_page_bmap(struct virtio_balloon *vb)
+{
+ int i;
+
+ for (i = 0; i < vb->page_bmaps; i++)
+ memset(vb->page_bmap[i], 0, PAGE_BMAP_SIZE);
+}
+
+static void send_page_chunks(struct virtio_balloon *vb, struct virtqueue *vq,
+ int type, bool busy_wait)
For the unused page chunk case, it follows its own protocol:
{Moving back to before the header? How can this make sense?
struct scatterlist sg;
+ struct virtio_balloon_page_chunk_hdr *hdr;
+ void *buf;
unsigned int len;
- sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
+ switch (type) {
+ case PAGE_CHUNK_TYPE_BALLOON:
+ hdr = vb->balloon_page_chunk_hdr;
+ len = 0;
+ break;
+ default:
+ dev_warn(&vb->vdev->dev, "%s: chunk %d of unknown pages\n",
+ __func__, type);
+ return;
+ }
- /* We should always be able to add one buffer to an empty queue. */
- virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
- virtqueue_kick(vq);
+ buf = (void *)hdr - len;
It works fine since len is 0, so just buf = hdr.
+ len += sizeof(struct virtio_balloon_page_chunk_hdr);Why zero it here after device used it? Better to zero before use.
+ len += hdr->chunks * sizeof(struct virtio_balloon_page_chunk);
+ sg_init_table(&sg, 1);
+ sg_set_buf(&sg, buf, len);
+ if (!virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL)) {
+ virtqueue_kick(vq);
+ if (busy_wait)
+ while (!virtqueue_get_buf(vq, &len) &&
+ !virtqueue_is_broken(vq))
+ cpu_relax();
+ else
+ wait_event(vb->acked, virtqueue_get_buf(vq, &len));
+ hdr->chunks = 0;
+ }what are the units here? Looks like it's in 4kbyte units?
+}
+
+static void add_one_chunk(struct virtio_balloon *vb, struct virtqueue *vq,
+ int type, u64 base, u64 size)
+ if (hdr->chunks == MAX_PAGE_CHUNKS)and zero chunks here?
+ send_page_chunks(vb, vq, type, false);
+}Does this mean "convert_bmap_to_chunks"?
+
+static void chunking_pages_from_bmap(struct virtio_balloon *vb,
+ struct virtqueue *vq,
+ unsigned long pfn_start,
+ unsigned long *bmap,
+ unsigned long len)
+{
+ unsigned long pos = 0, end = len * BITS_PER_BYTE;
+
+ while (pos < end) {
+ unsigned long one = find_next_bit(bmap, end, pos);
+
+ if (one < end) {
+ unsigned long chunk_size, zero;
+
+ zero = find_next_zero_bit(bmap, end, one + 1);
zero and one are unhelpful names unless they equal 0 and 1.
current/next?
A bit in the bitmap corresponds to a pfn of a balloon page(4KB).+ if (zero >= end)Still not so what does a bit refer to? page or 4kbytes?
+ chunk_size = end - one;
+ else
+ chunk_size = zero - one;
+
+ if (chunk_size)
+ add_one_chunk(vb, vq, PAGE_CHUNK_TYPE_BALLOON,
+ pfn_start + one, chunk_size);
I think it should be a page.
Still use the ruler analogy here: the object is 11-meter long, and we have+static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)I don't understand what does this mean.
+{
+ if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_BALLOON_CHUNKS)) {
+ int pfns, page_bmaps, i;
+ unsigned long pfn_start, pfns_len;
+
+ pfn_start = vb->pfn_start;
+ pfns = vb->pfn_stop - pfn_start + 1;
+ pfns = roundup(roundup(pfns, BITS_PER_LONG),
+ PFNS_PER_PAGE_BMAP);
+ page_bmaps = pfns / PFNS_PER_PAGE_BMAP;
+ pfns_len = pfns / BITS_PER_BYTE;
+
+ for (i = 0; i < page_bmaps; i++) {
+ unsigned int bmap_len = PAGE_BMAP_SIZE;
+
+ /* The last one takes the leftover only */
+static void set_page_bmap(struct virtio_balloon *vb,This might not do anything in particular might not cover the
+ struct list_head *pages, struct virtqueue *vq)
+{
+ unsigned long pfn_start, pfn_stop;
+ struct page *page;
+ bool found;
+
+ vb->pfn_min = rounddown(vb->pfn_min, BITS_PER_LONG);
+ vb->pfn_max = roundup(vb->pfn_max, BITS_PER_LONG);
+
+ extend_page_bmap_size(vb, vb->pfn_max - vb->pfn_min + 1);
given pfn range. Do we care? Why not?
+ pfn_start = vb->pfn_min;Looks like this will crash if bmap_idx is out of range or
+
+ while (pfn_start < vb->pfn_max) {
+ pfn_stop = pfn_start + PFNS_PER_PAGE_BMAP * vb->page_bmaps;
+ pfn_stop = pfn_stop < vb->pfn_max ? pfn_stop : vb->pfn_max;
+
+ vb->pfn_start = pfn_start;
+ clear_page_bmap(vb);
+ found = false;
+
+ list_for_each_entry(page, pages, lru) {
+ unsigned long bmap_idx, bmap_pos, balloon_pfn;
+
+ balloon_pfn = page_to_balloon_pfn(page);
+ if (balloon_pfn < pfn_start || balloon_pfn > pfn_stop)
+ continue;
+ bmap_idx = (balloon_pfn - pfn_start) /
+ PFNS_PER_PAGE_BMAP;
+ bmap_pos = (balloon_pfn - pfn_start) %
+ PFNS_PER_PAGE_BMAP;
+ set_bit(bmap_pos, vb->page_bmap[bmap_idx]);
if page_bmap allocation failed.
#ifdef CONFIG_BALLOON_COMPACTION
+
+static void tell_host_one_page(struct virtio_balloon *vb,
+ struct virtqueue *vq, struct page *page)
+{
+ add_one_chunk(vb, vq, PAGE_CHUNK_TYPE_BALLOON, page_to_pfn(page), 1);
This passes 4kbytes to host which seems wrong - I think you want a full page.
this doesn't work as expected as features has been OK'd by then.
You want something like
validate_features that I posted. See
"virtio: allow drivers to validate features".
+ kfree(vb->page_bmap[0]);Looks like this will double free. you want to zero them I think.