Re: [PATCH v6 03/38] KVM: x86: hyper-v: Introduce TLB flush fifo

From: Maxim Levitsky
Date: Tue Jun 07 2022 - 04:58:12 EST


On Mon, 2022-06-06 at 10:36 +0200, Vitaly Kuznetsov wrote:
> To allow flushing individual GVAs instead of always flushing the
> whole
> VPID a per-vCPU structure to pass the requests is needed. Use
> standard
> 'kfifo' to queue two types of entries: individual GVA (GFN + up to
> 4095
> following GFNs in the lower 12 bits) and 'flush all'.

Honestly I still don't think I understand why we can't just
raise KVM_REQ_TLB_FLUSH_GUEST when the guest uses this interface
to flush everthing, and then we won't need to touch the ring
at all.

But I am just curious - if there is a reason for that,
then no objections from my side.


>
> The size of the fifo is arbitrary set to '16'.
>
> Note, kvm_hv_flush_tlb() only queues 'flush all' entries for now and
> kvm_hv_vcpu_flush_tlb() doesn't actually read the fifo just resets
> the
> queue before doing full TLB flush so the functional change is very
> small but the infrastructure is prepared to handle individual GVA
> flush requests.
>
> Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h | 20 +++++++++++++++
>  arch/x86/kvm/hyperv.c           | 45
> +++++++++++++++++++++++++++++++++
>  arch/x86/kvm/hyperv.h           | 16 ++++++++++++
>  arch/x86/kvm/x86.c              |  8 +++---
>  arch/x86/kvm/x86.h              |  1 +
>  5 files changed, 86 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index 7a6a6f47b603..cf3748be236d 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -25,6 +25,7 @@
>  #include <linux/clocksource.h>
>  #include <linux/irqbypass.h>
>  #include <linux/hyperv.h>
> +#include <linux/kfifo.h>
>  
>  #include <asm/apic.h>
>  #include <asm/pvclock-abi.h>
> @@ -600,6 +601,23 @@ struct kvm_vcpu_hv_synic {
>         bool dont_zero_synic_pages;
>  };
>  
> +/* The maximum number of entries on the TLB flush fifo. */
> +#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16)
> +/*
> + * Note: the following 'magic' entry is made up by KVM to avoid
> putting
> + * anything besides GVA on the TLB flush fifo. It is theoretically
> possible
> + * to observe a request to flush 4095 PFNs starting from
> 0xfffffffffffff000
> + * which will look identical. KVM's action to 'flush everything'
> instead of
> + * flushing these particular addresses is, however, fully legitimate
> as
> + * flushing more than requested is always OK.
> + */
> +#define KVM_HV_TLB_FLUSHALL_ENTRY  ((u64)-1)
> +
> +struct kvm_vcpu_hv_tlb_flush_fifo {
> +       spinlock_t write_lock;
> +       DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE);
> +};
> +
>  /* Hyper-V per vcpu emulation context */
>  struct kvm_vcpu_hv {
>         struct kvm_vcpu *vcpu;
> @@ -619,6 +637,8 @@ struct kvm_vcpu_hv {
>                 u32 enlightenments_ebx; /*
> HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */
>                 u32 syndbg_cap_eax; /*
> HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
>         } cpuid_cache;
> +
> +       struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo;
>  };
>  
>  /* Xen HVM per vcpu emulation context */
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index b402ad059eb9..c8b22bf67577 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -29,6 +29,7 @@
>  #include <linux/kvm_host.h>
>  #include <linux/highmem.h>
>  #include <linux/sched/cputime.h>
> +#include <linux/spinlock.h>
>  #include <linux/eventfd.h>
>  
>  #include <asm/apicdef.h>
> @@ -954,6 +955,9 @@ static int kvm_hv_vcpu_init(struct kvm_vcpu
> *vcpu)
>  
>         hv_vcpu->vp_index = vcpu->vcpu_idx;
>  
> +       INIT_KFIFO(hv_vcpu->tlb_flush_fifo.entries);
> +       spin_lock_init(&hv_vcpu->tlb_flush_fifo.write_lock);
> +
>         return 0;
>  }
>  
> @@ -1789,6 +1793,35 @@ static u64 kvm_get_sparse_vp_set(struct kvm
> *kvm, struct kvm_hv_hcall *hc,
>                               var_cnt * sizeof(*sparse_banks));
>  }
>  
> +static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
> +       struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> +       u64 entry = KVM_HV_TLB_FLUSHALL_ENTRY;
> +
> +       if (!hv_vcpu)
> +               return;
> +
> +       tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
> +
> +       kfifo_in_spinlocked(&tlb_flush_fifo->entries, &entry, 1,
> &tlb_flush_fifo->write_lock);
Tiny nitpick: the 'tlb_flush_fifo' isn't really neede here I think,
but probably will be needed in later patches, so feel free to ignore.


> +}
> +
> +void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
> +       struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> +
> +       kvm_vcpu_flush_tlb_guest(vcpu);
> +
> +       if (!hv_vcpu)
> +               return;
> +
> +       tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
> +
> +       kfifo_reset_out(&tlb_flush_fifo->entries);
Same here.
> +}
> +
>  static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct
> kvm_hv_hcall *hc)
>  {
>         struct kvm *kvm = vcpu->kvm;
> @@ -1797,6 +1830,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu
> *vcpu, struct kvm_hv_hcall *hc)
>         DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
>         u64 valid_bank_mask;
>         u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
> +       struct kvm_vcpu *v;
> +       unsigned long i;
>         bool all_cpus;
>  
>         /*
> @@ -1876,10 +1911,20 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu
> *vcpu, struct kvm_hv_hcall *hc)
>          * analyze it here, flush TLB regardless of the specified
> address space.
>          */
>         if (all_cpus) {
> +               kvm_for_each_vcpu(i, v, kvm)
> +                       hv_tlb_flush_enqueue(v);
> +
>                 kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH);
>         } else {
>                 sparse_set_to_vcpu_mask(kvm, sparse_banks,
> valid_bank_mask, vcpu_mask);
>  
> +               for_each_set_bit(i, vcpu_mask, KVM_MAX_VCPUS) {
> +                       v = kvm_get_vcpu(kvm, i);
> +                       if (!v)
> +                               continue;
> +                       hv_tlb_flush_enqueue(v);
> +               }
> +
>                 kvm_make_vcpus_request_mask(kvm,
> KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
>         }
>  
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index da2737f2a956..e5b32266ff7d 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -147,4 +147,20 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm,
> struct kvm_hyperv_eventfd *args);
>  int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2
> *cpuid,
>                      struct kvm_cpuid_entry2 __user *entries);
>  
> +
> +static inline void kvm_hv_vcpu_empty_flush_tlb(struct kvm_vcpu
> *vcpu)
> +{
> +       struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
> +       struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> +
> +       if (!hv_vcpu || !kvm_check_request(KVM_REQ_HV_TLB_FLUSH,
> vcpu))
> +               return;
> +
> +       tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
> +
> +       kfifo_reset_out(&tlb_flush_fifo->entries);
> +}
> +void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
> +
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 80cd3eb5e7de..805db43c2829 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3320,7 +3320,7 @@ static void kvm_vcpu_flush_tlb_all(struct
> kvm_vcpu *vcpu)
>         static_call(kvm_x86_flush_tlb_all)(vcpu);
>  }
>  
> -static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
> +void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
>  {
>         ++vcpu->stat.tlb_flush;
>  
> @@ -3355,14 +3355,14 @@ void
> kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
>  {
>         if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) {
>                 kvm_vcpu_flush_tlb_current(vcpu);
> -               kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
> +               kvm_hv_vcpu_empty_flush_tlb(vcpu);
>         }
>  
>         if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) {
>                 kvm_vcpu_flush_tlb_guest(vcpu);
> -               kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
> +               kvm_hv_vcpu_empty_flush_tlb(vcpu);
>         } else if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) {
> -               kvm_vcpu_flush_tlb_guest(vcpu);
> +               kvm_hv_vcpu_flush_tlb(vcpu);
>         }
>  }
>  EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 501b884b8cc4..9f7989f2c6d4 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -79,6 +79,7 @@ static inline unsigned int
> __shrink_ple_window(unsigned int val,
>  
>  #define MSR_IA32_CR_PAT_DEFAULT  0x0007040600070406ULL
>  
> +void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu);
>  void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
>  int kvm_check_nested_events(struct kvm_vcpu *vcpu);
>  

Best regards,
Maxim Levitsky