Re: [PATCH v6 6/8] KVM: Handle page fault for private memory
From: Sean Christopherson
Date: Fri Jun 17 2022 - 17:31:10 EST
On Thu, May 19, 2022, Chao Peng wrote:
> @@ -4028,8 +4081,11 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
> if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
> return true;
>
> - return fault->slot &&
> - mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
> + if (fault->is_private)
> + return mmu_notifier_retry(vcpu->kvm, mmu_seq);
Hmm, this is somewhat undesirable, because faulting in private pfns will be blocked
by unrelated mmu_notifier updates. The issue is mitigated to some degree by bumping
the sequence count if and only if overlap with a memslot is detected, e.g. mapping
changes that affects only userspace won't block the guest.
It probably won't be an issue, but at the same time it's easy to solve, and I don't
like piggybacking mmu_notifier_seq as private mappings shouldn't be subject to the
mmu_notifier.
That would also fix a theoretical bug in this patch where mmu_notifier_retry()
wouldn't be defined if CONFIG_MEMFILE_NOTIFIER=y && CONFIG_MMU_NOTIFIER=n.a
---
arch/x86/kvm/mmu/mmu.c | 11 ++++++-----
include/linux/kvm_host.h | 16 +++++++++++-----
virt/kvm/kvm_main.c | 2 +-
3 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0b455c16ec64..a4cbd29433e7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4100,10 +4100,10 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
return true;
if (fault->is_private)
- return mmu_notifier_retry(vcpu->kvm, mmu_seq);
- else
- return fault->slot &&
- mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
+ return memfile_notifier_retry(vcpu->kvm, mmu_seq);
+
+ return fault->slot &&
+ mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
}
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
@@ -4127,7 +4127,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
if (r)
return r;
- mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ mmu_seq = fault->is_private ? vcpu->kvm->memfile_notifier_seq :
+ vcpu->kvm->mmu_notifier_seq;
smp_rmb();
r = kvm_faultin_pfn(vcpu, fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 92afa5bddbc5..31f704c83099 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -773,16 +773,15 @@ struct kvm {
struct hlist_head irq_ack_notifier_list;
#endif
-#if (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)) ||\
- defined(CONFIG_MEMFILE_NOTIFIER)
+#if (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER))
unsigned long mmu_notifier_seq;
-#endif
-
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
struct mmu_notifier mmu_notifier;
long mmu_notifier_count;
unsigned long mmu_notifier_range_start;
unsigned long mmu_notifier_range_end;
+#endif
+#ifdef CONFIG_MEMFILE_NOTIFIER
+ unsigned long memfile_notifier_seq;
#endif
struct list_head devices;
u64 manual_dirty_log_protect;
@@ -1964,6 +1963,13 @@ static inline int mmu_notifier_retry_hva(struct kvm *kvm,
}
#endif
+#ifdef CONFIG_MEMFILE_NOTIFIER
+static inline bool memfile_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
+{
+ return kvm->memfile_notifier_seq != mmu_seq;
+}
+#endif
+
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
#define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2b416d3bd60e..e6d34c964d51 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -898,7 +898,7 @@ static void kvm_private_mem_notifier_handler(struct memfile_notifier *notifier,
KVM_MMU_LOCK(kvm);
if (kvm_unmap_gfn_range(kvm, &gfn_range))
kvm_flush_remote_tlbs(kvm);
- kvm->mmu_notifier_seq++;
+ kvm->memfile_notifier_seq++;
KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
}
base-commit: 333ef501c7f6c6d4ef2b7678905cad0f8ef3e271
--
> + else
> + return fault->slot &&
> + mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
> }
>
> static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> @@ -4088,7 +4144,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> read_unlock(&vcpu->kvm->mmu_lock);
> else
> write_unlock(&vcpu->kvm->mmu_lock);
> - kvm_release_pfn_clean(fault->pfn);
> +
> + if (fault->is_private)
> + kvm_private_mem_put_pfn(fault->slot, fault->pfn);
Why does the shmem path lock the page, and then unlock it here?
Same question for why this path marks it dirty? The guest has the page mapped
so the dirty flag is immediately stale.
In other words, why does KVM need to do something different for private pfns?
> + else
> + kvm_release_pfn_clean(fault->pfn);
> +
> return r;
> }
>
...
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 7f8f1c8dbed2..1d857919a947 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -878,7 +878,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>
> out_unlock:
> write_unlock(&vcpu->kvm->mmu_lock);
> - kvm_release_pfn_clean(fault->pfn);
> + if (fault->is_private)
Indirect MMUs can't support private faults, i.e. this is unnecessary.
> + kvm_private_mem_put_pfn(fault->slot, fault->pfn);
> + else
> + kvm_release_pfn_clean(fault->pfn);
> return r;
> }
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 3fd168972ecd..b0a7910505ed 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -2241,4 +2241,26 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
> /* Max number of entries allowed for each kvm dirty ring */
> #define KVM_DIRTY_RING_MAX_ENTRIES 65536
>
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +static inline int kvm_private_mem_get_pfn(struct kvm_memory_slot *slot,
> + gfn_t gfn, kvm_pfn_t *pfn, int *order)
> +{
> + int ret;
> + pfn_t pfnt;
> + pgoff_t index = gfn - slot->base_gfn +
> + (slot->private_offset >> PAGE_SHIFT);
> +
> + ret = slot->notifier.bs->get_lock_pfn(slot->private_file, index, &pfnt,
> + order);
> + *pfn = pfn_t_to_pfn(pfnt);
> + return ret;
> +}
> +
> +static inline void kvm_private_mem_put_pfn(struct kvm_memory_slot *slot,
> + kvm_pfn_t pfn)
> +{
> + slot->notifier.bs->put_unlock_pfn(pfn_to_pfn_t(pfn));
> +}
> +#endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
> +
> #endif
> --
> 2.25.1
>