[RFC v2 PATCH 07/13] KVM: Handle page fault for fd based memslot

From: Chao Peng
Date: Fri Nov 19 2021 - 08:49:38 EST


Current code assume the private memory is persistent and KVM can check
with backing store to see if private memory exists at the same address
by calling get_pfn(alloc=false).

Signed-off-by: Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx>
Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
---
arch/x86/kvm/mmu/mmu.c | 75 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 40377901598b..cd5d1f923694 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3277,6 +3277,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
if (max_level == PG_LEVEL_4K)
return PG_LEVEL_4K;

+ if (memslot_is_memfd(slot))
+ return max_level;
+
host_level = host_pfn_mapping_level(kvm, gfn, pfn, slot);
return min(host_level, max_level);
}
@@ -4555,6 +4558,65 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}

+static bool kvm_faultin_pfn_memfd(struct kvm_vcpu *vcpu,
+ struct kvm_page_fault *fault, int *r)
+{ int order;
+ kvm_pfn_t pfn;
+ struct kvm_memory_slot *slot = fault->slot;
+ bool priv_gfn = kvm_vcpu_is_private_gfn(vcpu, fault->addr >> PAGE_SHIFT);
+ bool priv_slot_exists = memslot_has_private(slot);
+ bool priv_gfn_exists = false;
+ int mem_convert_type;
+
+ if (priv_gfn && !priv_slot_exists) {
+ *r = RET_PF_INVALID;
+ return true;
+ }
+
+ if (priv_slot_exists) {
+ pfn = slot->memfd_ops->get_pfn(slot, slot->priv_file,
+ fault->gfn, false, &order);
+ if (pfn >= 0)
+ priv_gfn_exists = true;
+ }
+
+ if (priv_gfn && !priv_gfn_exists) {
+ mem_convert_type = KVM_EXIT_MEM_MAP_PRIVATE;
+ goto out_convert;
+ }
+
+ if (!priv_gfn && priv_gfn_exists) {
+ slot->memfd_ops->put_pfn(pfn);
+ mem_convert_type = KVM_EXIT_MEM_MAP_SHARED;
+ goto out_convert;
+ }
+
+ if (!priv_gfn) {
+ pfn = slot->memfd_ops->get_pfn(slot, slot->file,
+ fault->gfn, true, &order);
+ if (fault->pfn < 0) {
+ *r = RET_PF_INVALID;
+ return true;
+ }
+ }
+
+ if (slot->flags & KVM_MEM_READONLY)
+ fault->map_writable = false;
+ if (order == 0)
+ fault->max_level = PG_LEVEL_4K;
+
+ return false;
+
+out_convert:
+ vcpu->run->exit_reason = KVM_EXIT_MEMORY_ERROR;
+ vcpu->run->mem.type = mem_convert_type;
+ vcpu->run->mem.u.map.gpa = fault->gfn << PAGE_SHIFT;
+ vcpu->run->mem.u.map.size = PAGE_SIZE;
+ fault->pfn = -1;
+ *r = -1;
+ return true;
+}
+
static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, int *r)
{
struct kvm_memory_slot *slot = fault->slot;
@@ -4596,6 +4658,9 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
}
}

+ if (memslot_is_memfd(slot))
+ return kvm_faultin_pfn_memfd(vcpu, fault, r);
+
async = false;
fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, &async,
fault->write, &fault->map_writable,
@@ -4660,7 +4725,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
else
write_lock(&vcpu->kvm->mmu_lock);

- if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva))
+ if (fault->slot && !memslot_is_memfd(fault->slot) &&
+ mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva))
goto out_unlock;
r = make_mmu_pages_available(vcpu);
if (r)
@@ -4676,7 +4742,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
read_unlock(&vcpu->kvm->mmu_lock);
else
write_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(fault->pfn);
+
+ if (memslot_is_memfd(fault->slot))
+ fault->slot->memfd_ops->put_pfn(fault->pfn);
+ else
+ kvm_release_pfn_clean(fault->pfn);
+
return r;
}

--
2.17.1