[PATCH v2 1/7] KVM: arm64: Move hyp_pool locking out of refcount helpers

From: Quentin Perret
Date: Wed Jun 02 2021 - 05:44:00 EST


The hyp_page refcount helpers currently rely on the hyp_pool lock for
serialization. However, this means the refcounts can't be changed from
the buddy allocator core as it already holds the lock, which means pages
have to go through odd transient states.

For example, when a page is freed, its refcount is set to 0, and the
lock is transiently released before the page can be attached to a free
list in the buddy tree. This is currently harmless as the allocator
checks the list node of each page to see if it is available for
allocation or not, but it means the page refcount can't be trusted to
represent the state of the page even if the pool lock is held.

In order to fix this, remove the pool locking from the refcount helpers,
and move all the logic to the buddy allocator. This will simplify the
removal of the list node from struct hyp_page in a later patch.

Signed-off-by: Quentin Perret <qperret@xxxxxxxxxx>
---
arch/arm64/kvm/hyp/include/nvhe/gfp.h | 35 ----------------------
arch/arm64/kvm/hyp/nvhe/page_alloc.c | 43 ++++++++++++++++++++-------
2 files changed, 32 insertions(+), 46 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index 18a4494337bd..f2c84e4fa40f 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -22,41 +22,6 @@ struct hyp_pool {
unsigned int max_order;
};

-static inline void hyp_page_ref_inc(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- p->refcount++;
- hyp_spin_unlock(&pool->lock);
-}
-
-static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
- int ret;
-
- hyp_spin_lock(&pool->lock);
- p->refcount--;
- ret = (p->refcount == 0);
- hyp_spin_unlock(&pool->lock);
-
- return ret;
-}
-
-static inline void hyp_set_page_refcounted(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- if (p->refcount) {
- hyp_spin_unlock(&pool->lock);
- BUG();
- }
- p->refcount = 1;
- hyp_spin_unlock(&pool->lock);
-}
-
/* Allocation */
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
void hyp_get_page(void *addr);
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 237e03bf0cb1..d666f4789e31 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -93,15 +93,6 @@ static void __hyp_attach_page(struct hyp_pool *pool,
list_add_tail(&p->node, &pool->free_area[order]);
}

-static void hyp_attach_page(struct hyp_page *p)
-{
- struct hyp_pool *pool = hyp_page_to_pool(p);
-
- hyp_spin_lock(&pool->lock);
- __hyp_attach_page(pool, p);
- hyp_spin_unlock(&pool->lock);
-}
-
static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
struct hyp_page *p,
unsigned int order)
@@ -125,19 +116,49 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
return p;
}

+static inline void hyp_page_ref_inc(struct hyp_page *p)
+{
+ p->refcount++;
+}
+
+static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
+{
+ p->refcount--;
+ return (p->refcount == 0);
+}
+
+static inline void hyp_set_page_refcounted(struct hyp_page *p)
+{
+ BUG_ON(p->refcount);
+ p->refcount = 1;
+}
+
+/*
+ * Changes to the buddy tree and page refcounts must be done with the hyp_pool
+ * lock held. If a refcount change requires an update to the buddy tree (e.g.
+ * hyp_put_page()), both operations must be done within the same critical
+ * section to guarantee transient states (e.g. a page with null refcount but
+ * not yet attached to a free list) can't be observed by well-behaved readers.
+ */
void hyp_put_page(void *addr)
{
struct hyp_page *p = hyp_virt_to_page(addr);
+ struct hyp_pool *pool = hyp_page_to_pool(p);

+ hyp_spin_lock(&pool->lock);
if (hyp_page_ref_dec_and_test(p))
- hyp_attach_page(p);
+ __hyp_attach_page(pool, p);
+ hyp_spin_unlock(&pool->lock);
}

void hyp_get_page(void *addr)
{
struct hyp_page *p = hyp_virt_to_page(addr);
+ struct hyp_pool *pool = hyp_page_to_pool(p);

+ hyp_spin_lock(&pool->lock);
hyp_page_ref_inc(p);
+ hyp_spin_unlock(&pool->lock);
}

void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
@@ -159,8 +180,8 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
p = list_first_entry(&pool->free_area[i], struct hyp_page, node);
p = __hyp_extract_page(pool, p, order);

- hyp_spin_unlock(&pool->lock);
hyp_set_page_refcounted(p);
+ hyp_spin_unlock(&pool->lock);

return hyp_page_to_virt(p);
}
--
2.32.0.rc0.204.g9fa02ecfa5-goog