[PATCH 03/14] KVM: arm64: Continue stage-2 map when re-creating mappings
From: Quentin Perret
Date: Mon Jul 19 2021 - 06:47:55 EST
The stage-2 map walkers currently return -EAGAIN when re-creating
identical mappings or only changing access permissions. This allows to
optimize mapping pages for concurrent (v)CPUs faulting on the same
page.
While this works as expected when touching one page-table leaf at a
time, this can lead to difficult situations when mapping larger ranges.
Indeed, a large map operation can fail in the middle if an existing
mapping is found in the range, even if it has compatible attributes,
hence leaving only half of the range mapped.
To avoid having to deal with such failures in the caller, don't
interrupt the map operation when hitting existing PTEs, but make sure to
still return -EAGAIN so that user_mem_abort() can mark the page dirty
when needed.
Cc: Yanan Wang <wangyanan55@xxxxxxxxxx>
Signed-off-by: Quentin Perret <qperret@xxxxxxxxxx>
---
arch/arm64/include/asm/kvm_pgtable.h | 2 +-
arch/arm64/kvm/hyp/pgtable.c | 21 +++++++++++++++++----
2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index d6649352c8b3..af62203d2f7a 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -258,7 +258,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
* If device attributes are not explicitly requested in @prot, then the
* mapping will be normal, cacheable.
*
- * Note that the update of a valid leaf PTE in this function will be aborted,
+ * Note that the update of a valid leaf PTE in this function will be skipped,
* if it's trying to recreate the exact same mapping or only change the access
* permissions. Instead, the vCPU will exit one more time from guest if still
* needed and then go through the path of relaxing permissions.
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 978f341d02ca..bb73c5331b7c 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -475,6 +475,8 @@ struct stage2_map_data {
void *memcache;
struct kvm_pgtable_mm_ops *mm_ops;
+
+ int ret;
};
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
@@ -612,8 +614,10 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
* the vCPU will exit one more time from guest if still needed
* and then go through the path of relaxing permissions.
*/
- if (!stage2_pte_needs_update(old, new))
- return -EAGAIN;
+ if (!stage2_pte_needs_update(old, new)) {
+ data->ret = -EAGAIN;
+ goto out;
+ }
stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
}
@@ -629,6 +633,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
smp_store_release(ptep, new);
if (stage2_pte_is_counted(new))
mm_ops->get_page(ptep);
+out:
if (kvm_phys_is_valid(phys))
data->phys += granule;
return 0;
@@ -771,6 +776,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
.mmu = pgt->mmu,
.memcache = mc,
.mm_ops = pgt->mm_ops,
+ .ret = 0,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
@@ -789,7 +795,10 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
dsb(ishst);
- return ret;
+ if (ret)
+ return ret;
+
+ return map_data.ret;
}
int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
@@ -802,6 +811,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
.memcache = mc,
.mm_ops = pgt->mm_ops,
.owner_id = owner_id,
+ .ret = 0,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
@@ -815,7 +825,10 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
return -EINVAL;
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
- return ret;
+ if (ret)
+ return ret;
+
+ return map_data.ret;
}
static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
--
2.32.0.402.g57bb445576-goog