Re: [PATCH v6 5/8] KVM: x86/mmu: Track the number of TDP MMU pages, but not the actual pages
From: Yan Zhao
Date: Fri Oct 21 2022 - 02:05:43 EST
Reviewed-by: Yan Zhao <yan.y.zhao@xxxxxxxxx>
On Wed, Oct 19, 2022 at 04:56:15PM +0000, Sean Christopherson wrote:
> Track the number of TDP MMU "shadow" pages instead of tracking the pages
> themselves. With the NX huge page list manipulation moved out of the common
> linking flow, elminating the list-based tracking means the happy path of
> adding a shadow page doesn't need to acquire a spinlock and can instead
> inc/dec an atomic.
>
> Keep the tracking as the WARN during TDP MMU teardown on leaked shadow
> pages is very, very useful for detecting KVM bugs.
>
> Tracking the number of pages will also make it trivial to expose the
> counter to userspace as a stat in the future, which may or may not be
> desirable.
>
> Note, the TDP MMU needs to use a separate counter (and stat if that ever
> comes to be) from the existing n_used_mmu_pages. The TDP MMU doesn't bother
> supporting the shrinker nor does it honor KVM_SET_NR_MMU_PAGES (because the
> TDP MMU consumes so few pages relative to shadow paging), and including TDP
> MMU pages in that counter would break both the shrinker and shadow MMUs,
> e.g. if a VM is using nested TDP.
>
> Cc: Yan Zhao <yan.y.zhao@xxxxxxxxx>
> Reviewed-by: Mingwei Zhang <mizhang@xxxxxxxxxx>
> Reviewed-by: David Matlack <dmatlack@xxxxxxxxxx>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
> arch/x86/include/asm/kvm_host.h | 11 +++--------
> arch/x86/kvm/mmu/tdp_mmu.c | 20 +++++++++-----------
> 2 files changed, 12 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 0333dbb8ec85..bbd2cecd34cb 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1290,6 +1290,9 @@ struct kvm_arch {
> */
> bool tdp_mmu_enabled;
>
> + /* The number of TDP MMU pages across all roots. */
> + atomic64_t tdp_mmu_pages;
> +
> /*
> * List of kvm_mmu_page structs being used as roots.
> * All kvm_mmu_page structs in the list should have
> @@ -1310,18 +1313,10 @@ struct kvm_arch {
> */
> struct list_head tdp_mmu_roots;
>
> - /*
> - * List of kvm_mmu_page structs not being used as roots.
> - * All kvm_mmu_page structs in the list should have
> - * tdp_mmu_page set and a tdp_mmu_root_count of 0.
> - */
> - struct list_head tdp_mmu_pages;
> -
> /*
> * Protects accesses to the following fields when the MMU lock
> * is held in read mode:
> * - tdp_mmu_roots (above)
> - * - tdp_mmu_pages (above)
> * - the link field of kvm_mmu_page structs used by the TDP MMU
> * - possible_nx_huge_pages;
> * - the possible_nx_huge_page_link field of kvm_mmu_page structs used
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 059231c82345..4e5b3ae824c1 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -29,7 +29,6 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm)
> kvm->arch.tdp_mmu_enabled = true;
> INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
> spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
> - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
> kvm->arch.tdp_mmu_zap_wq = wq;
> return 1;
> }
> @@ -54,7 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
> /* Also waits for any queued work items. */
> destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
>
> - WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
> + WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
> WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
>
> /*
> @@ -377,11 +376,13 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
> static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
> {
> kvm_account_pgtable_pages((void *)sp->spt, +1);
> + atomic64_inc(&kvm->arch.tdp_mmu_pages);
> }
>
> static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
> {
> kvm_account_pgtable_pages((void *)sp->spt, -1);
> + atomic64_dec(&kvm->arch.tdp_mmu_pages);
> }
>
> /**
> @@ -397,17 +398,17 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
> bool shared)
> {
> tdp_unaccount_mmu_page(kvm, sp);
> +
> + if (!sp->nx_huge_page_disallowed)
> + return;
> +
> if (shared)
> spin_lock(&kvm->arch.tdp_mmu_pages_lock);
> else
> lockdep_assert_held_write(&kvm->mmu_lock);
>
> - list_del(&sp->link);
> -
> - if (sp->nx_huge_page_disallowed) {
> - sp->nx_huge_page_disallowed = false;
> - untrack_possible_nx_huge_page(kvm, sp);
> - }
> + sp->nx_huge_page_disallowed = false;
> + untrack_possible_nx_huge_page(kvm, sp);
>
> if (shared)
> spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
> @@ -1140,9 +1141,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
> tdp_mmu_set_spte(kvm, iter, spte);
> }
>
> - spin_lock(&kvm->arch.tdp_mmu_pages_lock);
> - list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
> - spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
> tdp_account_mmu_page(kvm, sp);
>
> return 0;
> --
> 2.38.0.413.g74048e4d9e-goog
>