Re: [PATCH v3 09/12] KVM: VMX: Remove vmx->current_tsc_ratio and decache_tsc_multiplier()

From: Maxim Levitsky
Date: Mon May 24 2021 - 13:54:08 EST


On Fri, 2021-05-21 at 11:24 +0100, Ilias Stamatis wrote:
> The vmx->current_tsc_ratio field is redundant as
> vcpu->arch.tsc_scaling_ratio already tracks the current TSC scaling
> ratio. Removing this field makes decache_tsc_multiplier() an one-liner
> so remove that too and do a vmcs_write64() directly in order to be more
> consistent with surrounding code.
Not to mention that 'decache_tsc_multiplier' isn't a good name IMHO
for this....


>
> Signed-off-by: Ilias Stamatis <ilstam@xxxxxxxxxx>
> ---
> arch/x86/kvm/vmx/nested.c | 9 ++++-----
> arch/x86/kvm/vmx/vmx.c | 5 ++---
> arch/x86/kvm/vmx/vmx.h | 8 --------
> 3 files changed, 6 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 6058a65a6ede..239154d3e4e7 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2533,9 +2533,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
> }
>
> vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
> -
> if (kvm_has_tsc_control)
> - decache_tsc_multiplier(vmx);
> + vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
>
> nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
>
> @@ -4501,12 +4500,12 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
> vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
> vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
> vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
> + if (kvm_has_tsc_control)
> + vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
> +
> if (vmx->nested.l1_tpr_threshold != -1)
> vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
>
> - if (kvm_has_tsc_control)
> - decache_tsc_multiplier(vmx);
> -
> if (vmx->nested.change_vmcs01_virtual_apic_mode) {
> vmx->nested.change_vmcs01_virtual_apic_mode = false;
> vmx_set_virtual_apic_mode(vcpu);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 4b70431c2edd..7c52c697cfe3 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1392,9 +1392,8 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
> }
>
> /* Setup TSC multiplier */
> - if (kvm_has_tsc_control &&
> - vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
> - decache_tsc_multiplier(vmx);
> + if (kvm_has_tsc_control)
> + vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);

This might have an overhead of writing the TSC scaling ratio even if
it is unchanged. I haven't measured how expensive vmread/vmwrites are but
at least when nested, the vmreads/vmwrites can be very expensive (if they
cause a vmexit).

This is why I think the 'vmx->current_tsc_ratio' exists - to have
a cached value of TSC scale ratio to avoid either 'vmread'ing
or 'vmwrite'ing it without a need.


Best regards,
Maxim Levitsky

> }
>
> /*
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index aa97c82e3451..3eaa86a0ba3e 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -322,8 +322,6 @@ struct vcpu_vmx {
> /* apic deadline value in host tsc */
> u64 hv_deadline_tsc;
>
> - u64 current_tsc_ratio;
> -
> unsigned long host_debugctlmsr;
>
> /*
> @@ -532,12 +530,6 @@ static inline struct vmcs *alloc_vmcs(bool shadow)
> GFP_KERNEL_ACCOUNT);
> }
>
> -static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
> -{
> - vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
> - vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
> -}
> -
> static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
> {
> return vmx->secondary_exec_control &